From f7d15472e55ae5695ea8015a6acee54914bd2a1e Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 11:29:59 +0200 Subject: [PATCH 01/13] feat: implement "strucvars ingest" command (#200) --- README.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/README.md b/README.md index 992fa3dd..74b8da60 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ The `seqvars ingest` command will annotate the variants with the following infor - gnomAD genomes and exomes allele frequencies - gnomAD-mtDNA and HelixMtDb allele frequencies - functional annotation following the [VCF ANN field standard](https://pcingola.github.io/SnpEff/adds/VCFannotationformat_v1.0.pdf) + - `Gene_Name` is writen as HGNC symbol + - `Gene_ID` is written as HGNC ID The command will emit one output line for each variant allele from the input and each affected gene. That is, if two variant alleles affect two genes, four records will be written to the output file. @@ -127,6 +129,94 @@ Overall, the command will emit the following header rows in addition to the `##c > [!NOTE] > Future versions of the worker will annotate the worst effect on a MANE select or MANE Clinical transcript. +## The `strucvars ingest` Command + +This command takes as the input one or more VCF files from structural variant callers and converts it into a file for further querying. +The command supports the following variant callers and can guess the caller from the VCF header and first record. + +- Delly2 +- Dragen-SV (equivalent to Manta) +- Dragen-CNV +- GATK gCNV +- Manta +- MELT +- PopDel + +One record will be written out for each variant, each with a single alternate allele. + +The following symbolic `ALT` alleles are used: + +- `` +- `` +- `` +- `` +- VCF break-end syntax, e.g., `T[chr1:5[` + +The following `INFO` fields are written: + +- `IMPRECISE` -- flag that specifies that this is an imprecise variant +- `END` -- end position of the variants +- `SVTYPE` -- type of the variant, one of ``, ``, ``, ``, `BND` +- `SVLEN` -- absolute length of the SV for linear variants, `.` for non-linear variants +- `SVCLAIM` -- specificaton of `D` (change in abundance), `J` (novel junction), or `DJ` (both change in abundance and novel junction) +- `chr2` -- (non-standard field), second chromosome for BND variants +- `annsv` -- (non-standard field), annotation of the variant effect on each affected gene + +The `annsv` field is a pipe-character (`|`) separated list of the following fields: + +1. symbolic alternate alele, e.g., `` +2. effects on the gene's transcript, separated by `&` + - `transcript_variant` -- variant affects the whole transcript + - `exon_variant` -- variant affects exon + - `splice_region_variant` -- variant affects splice region + - `intron_variant` -- variant affects only intron + - `upstream_variant` -- variant upsream of gene + - `downstream_variant` -- variant downstream of gene + - `intergenic_variant` -- default for "no gene affected", but never written +3. HGNC gene symbol, e.g., `BRCA1` +4. HGNC gene ID, e.g., `HGNC:1100` + +The following `FORMAT` fields are written: + +- `GT` -- (standard field) genotype, if applicable +- `GQ` -- (standard field) genotype quality, if applicable +- `pec` -- total coverage with paired-end reads +- `pev` -- paired-end reads supporting the variant +- `src` -- total coverage with split reads +- `srv` -- split reads supporting the variant +- `amq` -- average mapping quality over the variant +- `cn` -- copy number of the variant in the sample +- `anc` -- average normalized coverage over the variant in the sample +- `pc` -- point count (windows/targets/probes) + + +Overall, the command will emit the following header rows in addition to the `##contig=` lines. + +``` +##fileformat=VCFv4.2 +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +``` + # Developer Information This section is only relevant for developers of `varfish-server-worker`. From 4501696f976a62cbf344b2a3f56e6d8ce20f8267 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 14:09:47 +0200 Subject: [PATCH 02/13] wip --- README.md | 33 +- src/common.rs | 139 ++++++- src/db/mk_inhouse/cli.rs | 2 +- src/db/mk_inhouse/input.rs | 2 +- src/db/mk_inhouse/output.rs | 2 +- src/db/to_bin/clinvar/input.rs | 2 +- src/db/to_bin/clinvar/mod.rs | 4 +- src/db/to_bin/vardbs/input.rs | 2 +- src/db/to_bin/vardbs/mod.rs | 2 +- src/main.rs | 24 +- src/seqvars/ingest/mod.rs | 11 +- src/strucvars/ingest/header.rs | 343 ++++++++++++++++++ src/strucvars/ingest/mod.rs | 89 +++++ ...build_output_header_37@delly2-min.vcf.snap | 58 +++ ...d_output_header_37@dragen-cnv-min.vcf.snap | 54 +++ ...ld_output_header_37@dragen-sv-min.vcf.snap | 54 +++ ...__build_output_header_37@gcnv-min.vcf.snap | 54 +++ ..._build_output_header_37@manta-min.vcf.snap | 54 +++ ...__build_output_header_37@melt-min.vcf.snap | 54 +++ ...build_output_header_37@popdel-min.vcf.snap | 58 +++ ...build_output_header_38@delly2-min.vcf.snap | 58 +++ ...d_output_header_38@dragen-cnv-min.vcf.snap | 54 +++ ...ld_output_header_38@dragen-sv-min.vcf.snap | 54 +++ ...__build_output_header_38@gcnv-min.vcf.snap | 54 +++ ..._build_output_header_38@manta-min.vcf.snap | 54 +++ ...__build_output_header_38@melt-min.vcf.snap | 54 +++ ...build_output_header_38@popdel-min.vcf.snap | 58 +++ src/strucvars/mod.rs | 2 + src/{sv => strucvars}/query/bgdbs.rs | 0 src/{sv => strucvars}/query/clinvar.rs | 0 src/{sv => strucvars}/query/genes.rs | 0 src/{sv => strucvars}/query/interpreter.rs | 2 +- src/{sv => strucvars}/query/masked.rs | 10 +- src/{sv => strucvars}/query/mod.rs | 2 +- src/{sv => strucvars}/query/pathogenic.rs | 0 src/{sv => strucvars}/query/records.rs | 0 src/{sv => strucvars}/query/schema.rs | 0 ..._masked__test__load_masked_db_records.snap | 113 ++++++ ...masked__test__masked_db_fetch_records.snap | 13 + ..._schema__tests__call_info_serde_smoke.snap | 18 + ...schema__tests__case_query_serde_smoke.snap | 77 ++++ ..._tests__genotype_criteria_serde_smoke.snap | 51 +++ ...tests__structural_variant_serde_smoke.snap | 14 + ..._masked__test__load_masked_db_records.snap | 0 ...masked__test__masked_db_fetch_records.snap | 0 ..._schema__tests__call_info_serde_smoke.snap | 0 ...schema__tests__case_query_serde_smoke.snap | 0 ..._tests__genotype_criteria_serde_smoke.snap | 0 ...tests__structural_variant_serde_smoke.snap | 0 src/{sv => strucvars}/query/tads.rs | 0 src/sv/mod.rs | 1 - tests/db/strucvars/ingest/delly2-min.ped | 3 + tests/db/strucvars/ingest/delly2-min.vcf | 132 +++++++ tests/db/strucvars/ingest/dragen-cnv-min.ped | 1 + tests/db/strucvars/ingest/dragen-cnv-min.vcf | 48 +++ tests/db/strucvars/ingest/dragen-sv-min.ped | 1 + tests/db/strucvars/ingest/dragen-sv-min.vcf | 133 +++++++ tests/db/strucvars/ingest/gcnv-min.ped | 1 + tests/db/strucvars/ingest/gcnv-min.vcf | 112 ++++++ tests/db/strucvars/ingest/manta-min.ped | 1 + tests/db/strucvars/ingest/manta-min.vcf | 128 +++++++ tests/db/strucvars/ingest/melt-min.ped | 1 + tests/db/strucvars/ingest/melt-min.vcf | 55 +++ tests/db/strucvars/ingest/popdel-min.ped | 3 + tests/db/strucvars/ingest/popdel-min.vcf | 121 ++++++ 65 files changed, 2419 insertions(+), 51 deletions(-) create mode 100644 src/strucvars/ingest/header.rs create mode 100644 src/strucvars/ingest/mod.rs create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap create mode 100644 src/strucvars/mod.rs rename src/{sv => strucvars}/query/bgdbs.rs (100%) rename src/{sv => strucvars}/query/clinvar.rs (100%) rename src/{sv => strucvars}/query/genes.rs (100%) rename src/{sv => strucvars}/query/interpreter.rs (99%) rename src/{sv => strucvars}/query/masked.rs (96%) rename src/{sv => strucvars}/query/mod.rs (99%) rename src/{sv => strucvars}/query/pathogenic.rs (100%) rename src/{sv => strucvars}/query/records.rs (100%) rename src/{sv => strucvars}/query/schema.rs (100%) create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__load_masked_db_records.snap create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__masked_db_fetch_records.snap create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__call_info_serde_smoke.snap create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__case_query_serde_smoke.snap create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__genotype_criteria_serde_smoke.snap create mode 100644 src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__structural_variant_serde_smoke.snap rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__masked__test__load_masked_db_records.snap (100%) rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__masked__test__masked_db_fetch_records.snap (100%) rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__schema__tests__call_info_serde_smoke.snap (100%) rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__schema__tests__case_query_serde_smoke.snap (100%) rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__schema__tests__genotype_criteria_serde_smoke.snap (100%) rename src/{sv => strucvars}/query/snapshots/varfish_server_worker__sv__query__schema__tests__structural_variant_serde_smoke.snap (100%) rename src/{sv => strucvars}/query/tads.rs (100%) delete mode 100644 src/sv/mod.rs create mode 100644 tests/db/strucvars/ingest/delly2-min.ped create mode 100644 tests/db/strucvars/ingest/delly2-min.vcf create mode 100644 tests/db/strucvars/ingest/dragen-cnv-min.ped create mode 100644 tests/db/strucvars/ingest/dragen-cnv-min.vcf create mode 100644 tests/db/strucvars/ingest/dragen-sv-min.ped create mode 100644 tests/db/strucvars/ingest/dragen-sv-min.vcf create mode 100644 tests/db/strucvars/ingest/gcnv-min.ped create mode 100644 tests/db/strucvars/ingest/gcnv-min.vcf create mode 100644 tests/db/strucvars/ingest/manta-min.ped create mode 100644 tests/db/strucvars/ingest/manta-min.vcf create mode 100644 tests/db/strucvars/ingest/melt-min.ped create mode 100644 tests/db/strucvars/ingest/melt-min.vcf create mode 100644 tests/db/strucvars/ingest/popdel-min.ped create mode 100644 tests/db/strucvars/ingest/popdel-min.vcf diff --git a/README.md b/README.md index 74b8da60..405bea95 100644 --- a/README.md +++ b/README.md @@ -193,28 +193,33 @@ The following `FORMAT` fields are written: Overall, the command will emit the following header rows in addition to the `##contig=` lines. ``` -##fileformat=VCFv4.2 -##FILTER= +##fileformat=VCFv4.4 ##INFO= -##INFO= ##INFO= ##INFO= ##INFO= -##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= ##FORMAT= -##FORMAT= ##FORMAT= -##FORMAT= +##FORMAT= ##FORMAT= -##FORMAT= -##FORMAT= +##FORMAT= +##FORMAT= ##FORMAT= -##FORMAT= -##FORMAT= -##x-varfish-version= -##x-varfish-version= -##x-varfish-version= +##FORMAT= +##FORMAT= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= ``` # Developer Information diff --git a/src/common.rs b/src/common.rs index fee69574..7a2da75e 100644 --- a/src/common.rs +++ b/src/common.rs @@ -8,12 +8,12 @@ use std::{ }; use byte_unit::Byte; -use clap_verbosity_flag::{InfoLevel, Verbosity}; - use clap::Parser; +use clap_verbosity_flag::{InfoLevel, Verbosity}; use flate2::{bufread::MultiGzDecoder, write::GzEncoder, Compression}; use hgvs::static_data::Assembly; use indexmap::IndexMap; +use noodles_vcf as vcf; /// Commonly used command line arguments. #[derive(Parser, Debug)] @@ -386,3 +386,138 @@ mod test { Ok(()) } } + +/// Return the version of the `varfish-server-worker` crate and `x.y.z` in tests. +pub fn worker_version() -> &'static str { + if cfg!(test) { + "x.y.z" + } else { + env!("CARGO_PKG_VERSION") + } +} + +/// Add contigs for GRCh37. +pub fn add_contigs_37( + builder: vcf::header::Builder, +) -> Result { + use vcf::header::record::value::map::Contig; + use vcf::header::record::value::Map; + + let mut builder = builder; + + let specs: &[(&str, usize); 25] = &[ + ("1", 249250621), + ("2", 243199373), + ("3", 198022430), + ("4", 191154276), + ("5", 180915260), + ("6", 171115067), + ("7", 159138663), + ("8", 146364022), + ("9", 141213431), + ("10", 135534747), + ("11", 135006516), + ("12", 133851895), + ("13", 115169878), + ("14", 107349540), + ("15", 102531392), + ("16", 90354753), + ("17", 81195210), + ("18", 78077248), + ("19", 59128983), + ("20", 63025520), + ("21", 48129895), + ("22", 51304566), + ("X", 155270560), + ("Y", 59373566), + ("MT", 16569), + ]; + + for (contig, length) in specs { + builder = builder.add_contig( + contig + .parse() + .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, + Map::::builder() + .set_length(*length) + .insert( + "assembly" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, + "GRCh37", + ) + .insert( + "species" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: species"))?, + "Homo sapiens", + ) + .build()?, + ); + } + + Ok(builder) +} + +/// Add contigs for GRCh38. +pub fn add_contigs_38( + builder: vcf::header::Builder, +) -> Result { + use vcf::header::record::value::map::Contig; + use vcf::header::record::value::Map; + + let mut builder = builder; + + let specs: &[(&str, usize); 25] = &[ + ("chr1", 248956422), + ("chr2", 242193529), + ("chr3", 198295559), + ("chr4", 190214555), + ("chr5", 181538259), + ("chr6", 170805979), + ("chr7", 159345973), + ("chr8", 145138636), + ("chr9", 138394717), + ("chr10", 133797422), + ("chr11", 135086622), + ("chr12", 133275309), + ("chr13", 114364328), + ("chr14", 107043718), + ("chr15", 101991189), + ("chr16", 90338345), + ("chr17", 83257441), + ("chr18", 80373285), + ("chr19", 58617616), + ("chr20", 64444167), + ("chr21", 46709983), + ("chr22", 50818468), + ("chrX", 156040895), + ("chrY", 57227415), + ("chrM", 16569), + ]; + + for (contig, length) in specs { + builder = builder.add_contig( + contig + .parse() + .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, + Map::::builder() + .set_length(*length) + .insert( + "assembly" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, + "GRCh38", + ) + .insert( + "species" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: species"))?, + "Homo sapiens", + ) + .build()?, + ); + } + + Ok(builder) +} diff --git a/src/db/mk_inhouse/cli.rs b/src/db/mk_inhouse/cli.rs index e0219b4a..425acf19 100644 --- a/src/db/mk_inhouse/cli.rs +++ b/src/db/mk_inhouse/cli.rs @@ -20,7 +20,7 @@ use crate::{ build_chrom_map, open_read_maybe_gz, open_write_maybe_gz, read_lines, trace_rss_now, GenomeRelease, CHROMS, }, - sv::query::schema::SvType, + strucvars::query::schema::SvType, }; /// Create one file with records for each chromosome and SV type. diff --git a/src/db/mk_inhouse/input.rs b/src/db/mk_inhouse/input.rs index 6605e4c2..b8f874ca 100644 --- a/src/db/mk_inhouse/input.rs +++ b/src/db/mk_inhouse/input.rs @@ -2,7 +2,7 @@ use serde::{de::IntoDeserializer, Deserialize, Deserializer, Serialize}; -use crate::sv::query::schema::{StrandOrientation, SvType}; +use crate::strucvars::query::schema::{StrandOrientation, SvType}; /// Representation of the fields from the `StructuralVariant` table from /// VarFish Server that we need for building the background records. diff --git a/src/db/mk_inhouse/output.rs b/src/db/mk_inhouse/output.rs index 8aa0ac7f..19c45d11 100644 --- a/src/db/mk_inhouse/output.rs +++ b/src/db/mk_inhouse/output.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; -use crate::sv::query::schema::{StrandOrientation, SvType}; +use crate::strucvars::query::schema::{StrandOrientation, SvType}; use super::input::Record as InputRecord; diff --git a/src/db/to_bin/clinvar/input.rs b/src/db/to_bin/clinvar/input.rs index ed05d702..6e714cae 100644 --- a/src/db/to_bin/clinvar/input.rs +++ b/src/db/to_bin/clinvar/input.rs @@ -3,7 +3,7 @@ //! Note that not the full model is implemented, only the parts that are needed for the //! conversion of the ClinVar structural variants. -use crate::sv::query::clinvar::pbs::{Pathogenicity, VariationType}; +use crate::strucvars::query::clinvar::pbs::{Pathogenicity, VariationType}; /// Accession of a ClinVar record. #[derive(Debug, serde::Deserialize, serde::Serialize)] diff --git a/src/db/to_bin/clinvar/mod.rs b/src/db/to_bin/clinvar/mod.rs index 14099bd7..459dd6ae 100644 --- a/src/db/to_bin/clinvar/mod.rs +++ b/src/db/to_bin/clinvar/mod.rs @@ -7,7 +7,7 @@ use thousands::Separable; use crate::{ common::{build_chrom_map, open_read_maybe_gz, trace_rss_now}, - sv::query::clinvar::pbs::{Pathogenicity, SvDatabase, SvRecord}, + strucvars::query::clinvar::pbs::{Pathogenicity, SvDatabase, SvRecord}, }; pub mod input; @@ -71,7 +71,7 @@ fn convert_jsonl_to_protobuf( for measure in &record.reference_clinvar_assertion.measures.measures { // convert from JSONL to protocolbuffers: variation type let variation_type: Result< - crate::sv::query::clinvar::pbs::VariationType, + crate::strucvars::query::clinvar::pbs::VariationType, anyhow::Error, > = measure.r#type.try_into(); let variation_type = if let Ok(variation_type) = variation_type { diff --git a/src/db/to_bin/vardbs/input.rs b/src/db/to_bin/vardbs/input.rs index a2bfa6d9..45472f11 100644 --- a/src/db/to_bin/vardbs/input.rs +++ b/src/db/to_bin/vardbs/input.rs @@ -5,7 +5,7 @@ use serde::Deserialize; use tracing::error; use crate::db::mk_inhouse::output::Record as InhouseDbRecord; -use crate::sv::query::schema::SvType; +use crate::strucvars::query::schema::SvType; /// dbVar database record as read from TSV file. #[derive(Debug, Deserialize)] diff --git a/src/db/to_bin/vardbs/mod.rs b/src/db/to_bin/vardbs/mod.rs index 4239d6ab..102e4dcb 100644 --- a/src/db/to_bin/vardbs/mod.rs +++ b/src/db/to_bin/vardbs/mod.rs @@ -13,7 +13,7 @@ use crate::common::{build_chrom_map, open_read_maybe_gz, trace_rss_now}; use crate::db; use crate::db::mk_inhouse::output::Record as InhouseDbRecord; use crate::db::pbs::{BackgroundDatabase, BgDbRecord}; -use crate::sv::query::schema::SvType; +use crate::strucvars::query::schema::SvType; use self::input::InputRecord; diff --git a/src/main.rs b/src/main.rs index 6ea9bca6..7badf899 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,7 @@ pub mod common; pub mod db; pub mod seqvars; -pub mod sv; +pub mod strucvars; use clap::{Args, Parser, Subcommand}; use console::{Emoji, Term}; @@ -32,8 +32,8 @@ struct Cli { enum Commands { /// Database building related commands. Db(Db), - /// SV filtration related commands. - Sv(Sv), + /// Structural variant related commands. + Strucvars(Strucvars), /// Sequence variant related commands. Seqvars(Seqvars), } @@ -58,16 +58,17 @@ enum DbCommands { /// Parsing of "sv *" sub commands. #[derive(Debug, Args)] #[command(args_conflicts_with_subcommands = true)] -struct Sv { +struct Strucvars { /// The sub command to run #[command(subcommand)] - command: SvCommands, + command: StrucvarsCommands, } /// Enum supporting the parsing of "sv *" sub commands. #[derive(Debug, Subcommand)] -enum SvCommands { - Query(sv::query::Args), +enum StrucvarsCommands { + Ingest(strucvars::ingest::Args), + Query(strucvars::query::Args), } /// Parsing of "seqvars *" sub commands. @@ -121,9 +122,12 @@ fn main() -> Result<(), anyhow::Error> { seqvars::ingest::run(&cli.common, args)?; } }, - Commands::Sv(sv) => match &sv.command { - SvCommands::Query(args) => { - sv::query::run(&cli.common, args)?; + Commands::Strucvars(strucvars) => match &strucvars.command { + StrucvarsCommands::Ingest(args) => { + strucvars::ingest::run(&cli.common, args)?; + } + StrucvarsCommands::Query(args) => { + strucvars::query::run(&cli.common, args)?; } }, } diff --git a/src/seqvars/ingest/mod.rs b/src/seqvars/ingest/mod.rs index 27a67322..f6dbd9f5 100644 --- a/src/seqvars/ingest/mod.rs +++ b/src/seqvars/ingest/mod.rs @@ -2,7 +2,7 @@ use std::sync::{Arc, OnceLock}; -use crate::common::{self, open_read_maybe_gz, open_write_maybe_gz, GenomeRelease}; +use crate::common::{self, open_read_maybe_gz, open_write_maybe_gz, worker_version, GenomeRelease}; use mehari::annotate::seqvars::provider::MehariProvider; use noodles_vcf as vcf; use thousands::Separable; @@ -33,15 +33,6 @@ pub struct Args { pub path_out: String, } -/// Return the version of the `varfish-server-worker` crate and `x.y.z` in tests. -fn worker_version() -> &'static str { - if cfg!(test) { - "x.y.z" - } else { - env!("CARGO_PKG_VERSION") - } -} - /// Return path component fo rth egiven assembly. pub fn path_component(genomebuild: GenomeRelease) -> &'static str { match genomebuild { diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs new file mode 100644 index 00000000..b39683ab --- /dev/null +++ b/src/strucvars/ingest/header.rs @@ -0,0 +1,343 @@ +use std::collections::HashSet; + +use noodles_vcf as vcf; +use vcf::header::SampleNames; + +use crate::common::{add_contigs_37, add_contigs_38, GenomeRelease}; + +/// Return token for caller name. +fn caller_name(sv_caller: &mehari::annotate::strucvars::SvCaller) -> &'static str { + match sv_caller { + mehari::annotate::strucvars::SvCaller::Delly { .. } => "Delly", + mehari::annotate::strucvars::SvCaller::DragenSv { .. } => "DragenSv", + mehari::annotate::strucvars::SvCaller::DragenCnv { .. } => "DragenCnv", + mehari::annotate::strucvars::SvCaller::Gcnv { .. } => "Gcnv", + mehari::annotate::strucvars::SvCaller::Manta { .. } => "Manta", + mehari::annotate::strucvars::SvCaller::Melt { .. } => "Melt", + mehari::annotate::strucvars::SvCaller::Popdel { .. } => "Popdel", + } +} + +/// Return caller version. +fn caller_version(sv_caller: &mehari::annotate::strucvars::SvCaller) -> String { + match sv_caller { + mehari::annotate::strucvars::SvCaller::Delly { version } + | mehari::annotate::strucvars::SvCaller::DragenSv { version } + | mehari::annotate::strucvars::SvCaller::DragenCnv { version } + | mehari::annotate::strucvars::SvCaller::Gcnv { version } + | mehari::annotate::strucvars::SvCaller::Manta { version } + | mehari::annotate::strucvars::SvCaller::Melt { version } + | mehari::annotate::strucvars::SvCaller::Popdel { version } => version.clone(), + } +} + +/// Generate the output header from the input header. +pub fn build_output_header( + input_sample_names: &SampleNames, + input_sv_callers: &[&mehari::annotate::strucvars::SvCaller], + pedigree: &Option, + genomebuild: GenomeRelease, + worker_version: &str, +) -> Result { + use vcf::header::record::value::{ + map::{format, info, Filter, Format, Info}, + Map, + }; + use vcf::header::Number; + use vcf::record::genotypes::keys::key; + + let builder = vcf::Header::builder() + .add_filter("PASS", Map::::new("All filters passed")) + .add_info( + vcf::record::info::field::key::IS_IMPRECISE, + Map::::from(&vcf::record::info::field::key::IS_IMPRECISE), + ) + .add_info( + vcf::record::info::field::key::END_POSITION, + Map::::from(&vcf::record::info::field::key::END_POSITION), + ) + .add_info( + vcf::record::info::field::key::SV_TYPE, + Map::::from(&vcf::record::info::field::key::SV_TYPE), + ) + .add_info( + vcf::record::info::field::key::SV_LENGTHS, + Map::::from(&vcf::record::info::field::key::SV_LENGTHS), + ) + .add_info( + vcf::record::info::field::key::SV_CLAIM, + Map::::from(&vcf::record::info::field::key::SV_CLAIM), + ) + .add_info( + "chr2".parse()?, + Map::::new( + Number::Count(1), + info::Type::String, + "Second chromosome, if not equal to CHROM", + ), + ) + .add_info( + "annsv".parse()?, + Map::::new( + Number::Count(1), + info::Type::String, + "Effect annotations: 'Allele | Annotation | Gene_Name | Gene_ID'", + ), + ) + .add_format( + key::CONDITIONAL_GENOTYPE_QUALITY, + Map::::from(&key::CONDITIONAL_GENOTYPE_QUALITY), + ) + .add_format(key::GENOTYPE, Map::::from(&key::GENOTYPE)) + .add_format( + "pec".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Total coverage with paired-end reads", + ), + ) + .add_format( + "pev".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Paired-end reads supporting the variant", + ), + ) + .add_format( + "src".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Total coverage with split reads", + ), + ) + .add_format( + "srv".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Split reads supporting the variant", + ), + ) + .add_format( + "amq".parse()?, + Map::::new( + Number::Count(1), + format::Type::Float, + "Average mapping quality over the variant", + ), + ) + .add_format( + "cn".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Copy number of the variant in the sample", + ), + ) + .add_format( + "anc".parse()?, + Map::::new( + Number::Count(1), + format::Type::Float, + "Average normalized coverage over the variant in the sample", + ), + ) + .add_format( + "pc".parse()?, + Map::::new( + Number::Count(1), + format::Type::Integer, + "Point count (windows/targets/probes)", + ), + ); + + let mut builder = match genomebuild { + GenomeRelease::Grch37 => add_contigs_37(builder), + GenomeRelease::Grch38 => add_contigs_38(builder), + } + .map_err(|e| anyhow::anyhow!("problem adding contigs: {}", e))?; + + if let Some(pedigree) = pedigree { + let ped_idv = pedigree + .individuals + .iter() + .map(|(name, _)| name.clone()) + .collect::>(); + let input_idv = input_sample_names.iter().cloned().collect::>(); + if !ped_idv.eq(&input_idv) { + anyhow::bail!( + "pedigree individuals = {:?} != input individuals: {:?}", + &ped_idv, + &input_idv + ) + } + + for name in input_sample_names { + let i = pedigree + .individuals + .get(name) + .expect("checked equality above"); + if input_sample_names.contains(&i.name) { + builder = builder.add_sample_name(i.name.clone()); + } + + // Add SAMPLE entry. + builder = builder.insert( + "SAMPLE".parse()?, + noodles_vcf::header::record::Value::Map( + i.name.clone(), + Map::::builder() + .insert( + "Sex".parse()?, + mehari::annotate::strucvars::vcf_header::sex_str(i.sex), + ) + .insert( + "Disease".parse()?, + mehari::annotate::strucvars::vcf_header::disease_str(i.disease), + ) + .build()?, + ), + )?; + + // Add PEDIGREE entry. + let mut map_builder = Map::::builder(); + if let Some(father) = i.father.as_ref() { + map_builder = map_builder.insert("Father".parse()?, father.clone()); + } + if let Some(mother) = i.mother.as_ref() { + map_builder = map_builder.insert("Mother".parse()?, mother.clone()); + } + builder = builder.insert( + "PEDIGREE".parse()?, + noodles_vcf::header::record::Value::Map(i.name.clone(), map_builder.build()?), + )?; + } + } else { + for name in input_sample_names { + builder = builder.add_sample_name(name.clone()); + } + } + + use vcf::header::record::value::map::Other; + + let mut builder = builder.insert( + "x-varfish-version".parse()?, + vcf::header::record::Value::Map( + String::from("varfish-server-worker"), + Map::::builder() + .insert("Version".parse()?, worker_version) + .build()?, + ), + )?; + + for sv_caller in input_sv_callers.iter() { + builder = builder.insert( + "x-varfish-version".parse()?, + vcf::header::record::Value::Map( + caller_name(sv_caller).into(), + Map::::builder() + .insert("Name".parse()?, caller_name(sv_caller)) + .insert("Version".parse()?, caller_version(sv_caller)) + .build()?, + ), + )?; + } + + Ok(builder.build()) +} + +#[cfg(test)] +mod test { + use mehari::ped::PedigreeByName; + use rstest::rstest; + + macro_rules! set_snapshot_suffix { + ($($expr:expr),*) => { + let mut settings = insta::Settings::clone_current(); + settings.set_snapshot_suffix(format!($($expr,)*)); + let _guard = settings.bind_to_scope(); + } + } + + #[rstest] + #[case("tests/db/strucvars/ingest/delly2-min.vcf")] + #[case("tests/db/strucvars/ingest/dragen-cnv-min.vcf")] + #[case("tests/db/strucvars/ingest/dragen-sv-min.vcf")] + #[case("tests/db/strucvars/ingest/gcnv-min.vcf")] + #[case("tests/db/strucvars/ingest/manta-min.vcf")] + #[case("tests/db/strucvars/ingest/melt-min.vcf")] + #[case("tests/db/strucvars/ingest/popdel-min.vcf")] + + fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { + set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + let tmpdir = temp_testdir::TempDir::default(); + + let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); + + let input_vcf_header = noodles_vcf::reader::Builder + .build_from_path(path)? + .read_header()?; + let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(&path)?]; + let sv_caller_refs = sv_callers.iter().collect::>(); + let output_vcf_header = super::build_output_header( + &input_vcf_header.sample_names(), + &sv_caller_refs, + &Some(pedigree), + crate::common::GenomeRelease::Grch37, + "x.y.z", + )?; + + let out_path = tmpdir.join("out.vcf"); + let out_path_str = out_path.to_str().expect("invalid path"); + { + noodles_vcf::writer::Writer::new(std::fs::File::create(out_path_str)?) + .write_header(&output_vcf_header)?; + } + + insta::assert_snapshot!(std::fs::read_to_string(out_path_str)?); + + Ok(()) + } + + #[rstest] + #[case("tests/db/strucvars/ingest/delly2-min.vcf")] + #[case("tests/db/strucvars/ingest/dragen-cnv-min.vcf")] + #[case("tests/db/strucvars/ingest/dragen-sv-min.vcf")] + #[case("tests/db/strucvars/ingest/gcnv-min.vcf")] + #[case("tests/db/strucvars/ingest/manta-min.vcf")] + #[case("tests/db/strucvars/ingest/melt-min.vcf")] + #[case("tests/db/strucvars/ingest/popdel-min.vcf")] + fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { + set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + let tmpdir = temp_testdir::TempDir::default(); + + let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); + + let input_vcf_header = noodles_vcf::reader::Builder + .build_from_path(path)? + .read_header()?; + let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(&path)?]; + let sv_caller_refs = sv_callers.iter().collect::>(); + let output_vcf_header = super::build_output_header( + &input_vcf_header.sample_names(), + &sv_caller_refs, + &Some(pedigree), + crate::common::GenomeRelease::Grch38, + "x.y.z", + )?; + + let out_path = tmpdir.join("out.vcf"); + let out_path_str = out_path.to_str().expect("invalid path"); + { + noodles_vcf::writer::Writer::new(std::fs::File::create(out_path_str)?) + .write_header(&output_vcf_header)?; + } + + insta::assert_snapshot!(std::fs::read_to_string(out_path_str)?); + + Ok(()) + } +} diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs new file mode 100644 index 00000000..d63c4356 --- /dev/null +++ b/src/strucvars/ingest/mod.rs @@ -0,0 +1,89 @@ +//! Implementation of `strucvars ingest` subcommand. + +use crate::common::{self, open_read_maybe_gz, open_write_maybe_gz, worker_version, GenomeRelease}; + +use mehari::annotate::seqvars::provider::MehariProvider; +use noodles_vcf as vcf; +use thousands::Separable; + +pub mod header; + +/// Command line arguments for `strucvars ingest` subcommand. +#[derive(Debug, clap::Parser)] +#[command(author, version, about = "ingest structural variant VCF", long_about = None)] +pub struct Args { + /// Maximal number of variants to write out; optional. + #[clap(long)] + pub max_var_count: Option, + /// The path to the mehari database. + #[clap(long)] + pub path_mehari_db: String, + /// The assumed genome build. + #[clap(long)] + pub genomebuild: GenomeRelease, + /// Path to the pedigree file. + #[clap(long)] + pub path_ped: String, + /// Path to input file. + #[clap(long)] + pub path_in: String, + /// Path to output file. + #[clap(long)] + pub path_out: String, +} + +/// Main entry point for `strucvars ingest` sub command. +pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow::Error> { + let before_anything = std::time::Instant::now(); + tracing::info!("args_common = {:#?}", &args_common); + tracing::info!("args = {:#?}", &args); + + common::trace_rss_now(); + + tracing::info!("loading pedigree..."); + let pedigree = mehari::ped::PedigreeByName::from_path(&args.path_ped) + .map_err(|e| anyhow::anyhow!("problem parsing PED file: {}", e))?; + tracing::info!("pedigre = {:#?}", &pedigree); + + tracing::info!("opening input file..."); + let mut input_reader = { + vcf::reader::Builder + .build_from_reader(open_read_maybe_gz(&args.path_in)?) + .map_err(|e| anyhow::anyhow!("could not build VCF reader: {}", e))? + }; + + tracing::info!("processing header..."); + let input_header = input_reader + .read_header() + .map_err(|e| anyhow::anyhow!("problem reading VCF header: {}", e))?; + let output_header = header::build_output_header( + input_header.sample_names(), + &vec![], + &Some(pedigree), + args.genomebuild, + worker_version(), + ) + .map_err(|e| anyhow::anyhow!("problem building output header: {}", e))?; + + let mut output_writer = { vcf::writer::Writer::new(open_write_maybe_gz(&args.path_out)?) }; + output_writer + .write_header(&output_header) + .map_err(|e| anyhow::anyhow!("problem writing header: {}", e))?; + + // process_variants( + // &mut output_writer, + // &mut input_reader, + // &output_header, + // &input_header, + // args, + // )?; + + tracing::info!( + "All of `strucvars ingest` completed in {:?}", + before_anything.elapsed() + ); + Ok(()) +} + +#[cfg(test)] +mod test {} diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap new file mode 100644 index 00000000..5f3a360a --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap @@ -0,0 +1,58 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap new file mode 100644 index 00000000..18e81a17 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap new file mode 100644 index 00000000..eb90e9d3 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap new file mode 100644 index 00000000..17b62134 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap new file mode 100644 index 00000000..7d6dc923 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap new file mode 100644 index 00000000..b0e25e2b --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap new file mode 100644 index 00000000..d54eeff0 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap @@ -0,0 +1,58 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap new file mode 100644 index 00000000..903f2959 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap @@ -0,0 +1,58 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap new file mode 100644 index 00000000..c4a97f25 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap new file mode 100644 index 00000000..c95b5cb6 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap new file mode 100644 index 00000000..ed1dcf2c --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap new file mode 100644 index 00000000..68d0312d --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap new file mode 100644 index 00000000..7f41b458 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap @@ -0,0 +1,54 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap new file mode 100644 index 00000000..a0c6692b --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap @@ -0,0 +1,58 @@ +--- +source: src/strucvars/ingest/header.rs +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/mod.rs b/src/strucvars/mod.rs new file mode 100644 index 00000000..c4205066 --- /dev/null +++ b/src/strucvars/mod.rs @@ -0,0 +1,2 @@ +pub mod ingest; +pub mod query; diff --git a/src/sv/query/bgdbs.rs b/src/strucvars/query/bgdbs.rs similarity index 100% rename from src/sv/query/bgdbs.rs rename to src/strucvars/query/bgdbs.rs diff --git a/src/sv/query/clinvar.rs b/src/strucvars/query/clinvar.rs similarity index 100% rename from src/sv/query/clinvar.rs rename to src/strucvars/query/clinvar.rs diff --git a/src/sv/query/genes.rs b/src/strucvars/query/genes.rs similarity index 100% rename from src/sv/query/genes.rs rename to src/strucvars/query/genes.rs diff --git a/src/sv/query/interpreter.rs b/src/strucvars/query/interpreter.rs similarity index 99% rename from src/sv/query/interpreter.rs rename to src/strucvars/query/interpreter.rs index 827474c5..cb6df271 100644 --- a/src/sv/query/interpreter.rs +++ b/src/strucvars/query/interpreter.rs @@ -380,7 +380,7 @@ mod tests { use crate::{ db::conf::Database, - sv::query::schema::{ + strucvars::query::schema::{ CallInfo, GenomicRegion, GenotypeChoice, GenotypeCriteria, StrandOrientation, }, }; diff --git a/src/sv/query/masked.rs b/src/strucvars/query/masked.rs similarity index 96% rename from src/sv/query/masked.rs rename to src/strucvars/query/masked.rs index b3546a3a..51ef7855 100644 --- a/src/sv/query/masked.rs +++ b/src/strucvars/query/masked.rs @@ -293,14 +293,16 @@ mod test { masked_db: super::MaskedDb, chrom_map: indexmap::IndexMap, ) { - let sv = crate::sv::query::schema::StructuralVariant { + let sv = crate::strucvars::query::schema::StructuralVariant { chrom: String::from("1"), pos: sv_pos, end: sv_end, chrom2: None, - sv_type: crate::sv::query::schema::SvType::Del, - sv_sub_type: crate::sv::query::schema::SvSubType::Del, - strand_orientation: Some(crate::sv::query::schema::StrandOrientation::FiveToThree), + sv_type: crate::strucvars::query::schema::SvType::Del, + sv_sub_type: crate::strucvars::query::schema::SvSubType::Del, + strand_orientation: Some( + crate::strucvars::query::schema::StrandOrientation::FiveToThree, + ), call_info: Default::default(), }; diff --git a/src/sv/query/mod.rs b/src/strucvars/query/mod.rs similarity index 99% rename from src/sv/query/mod.rs rename to src/strucvars/query/mod.rs index fe6fa710..10e8ad8a 100644 --- a/src/sv/query/mod.rs +++ b/src/strucvars/query/mod.rs @@ -32,7 +32,7 @@ use uuid::Uuid; use crate::{ common::{build_chrom_map, numeric_gene_id, open_read_maybe_gz, trace_rss_now}, db::conf::{Database, GenomeRelease, TadSet as TadSetChoice}, - sv::query::{ + strucvars::query::{ interpreter::QueryInterpreter, pathogenic::Record as KnownPathogenicRecord, records::StructuralVariant as RecordSv, schema::CaseQuery, schema::StructuralVariant as SchemaSv, diff --git a/src/sv/query/pathogenic.rs b/src/strucvars/query/pathogenic.rs similarity index 100% rename from src/sv/query/pathogenic.rs rename to src/strucvars/query/pathogenic.rs diff --git a/src/sv/query/records.rs b/src/strucvars/query/records.rs similarity index 100% rename from src/sv/query/records.rs rename to src/strucvars/query/records.rs diff --git a/src/sv/query/schema.rs b/src/strucvars/query/schema.rs similarity index 100% rename from src/sv/query/schema.rs rename to src/strucvars/query/schema.rs diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__load_masked_db_records.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__load_masked_db_records.snap new file mode 100644 index 00000000..50f45c46 --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__load_masked_db_records.snap @@ -0,0 +1,113 @@ +--- +source: src/strucvars/query/masked.rs +expression: result +--- +records: + - - begin: 0 + end: 2 + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] + - [] +trees: + - entries: + - data: 0 + interval: + start: 0 + end: 2 + max: 2 + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + - entries: [] + max_level: 0 + indexed: true + diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__masked_db_fetch_records.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__masked_db_fetch_records.snap new file mode 100644 index 00000000..55869bac --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__masked__test__masked_db_fetch_records.snap @@ -0,0 +1,13 @@ +--- +source: src/strucvars/query/masked.rs +expression: result +--- +left: + - begin: 0 + end: 10 + - begin: 5 + end: 15 +right: + - begin: 100 + end: 110 + diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__call_info_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__call_info_serde_smoke.snap new file mode 100644 index 00000000..21922035 --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__call_info_serde_smoke.snap @@ -0,0 +1,18 @@ +--- +source: src/strucvars/query/schema.rs +expression: "serde_json::to_string_pretty(&info).unwrap()" +--- +{ + "genotype": "0/1", + "effective_genotype": null, + "matched_gt_criteria": null, + "quality": 10.0, + "paired_end_cov": 10, + "paired_end_var": 10, + "split_read_cov": 10, + "split_read_var": 10, + "copy_number": 1, + "average_normalized_cov": 0.491, + "point_count": 5, + "average_mapping_quality": 60.0 +} diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__case_query_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__case_query_serde_smoke.snap new file mode 100644 index 00000000..38e6fe1f --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__case_query_serde_smoke.snap @@ -0,0 +1,77 @@ +--- +source: src/strucvars/query/schema.rs +expression: "serde_json::to_string_pretty(&query).unwrap()" +--- +{ + "database": "refseq", + "svdb_dgv_enabled": false, + "svdb_dgv_min_overlap": null, + "svdb_dgv_max_count": null, + "svdb_dgv_gs_enabled": false, + "svdb_dgv_gs_min_overlap": null, + "svdb_dgv_gs_max_count": null, + "svdb_gnomad_enabled": false, + "svdb_gnomad_min_overlap": null, + "svdb_gnomad_max_count": null, + "svdb_exac_enabled": false, + "svdb_exac_min_overlap": null, + "svdb_exac_max_count": null, + "svdb_dbvar_enabled": false, + "svdb_dbvar_min_overlap": null, + "svdb_dbvar_max_count": null, + "svdb_g1k_enabled": false, + "svdb_g1k_min_overlap": null, + "svdb_g1k_max_count": null, + "svdb_inhouse_enabled": false, + "svdb_inhouse_min_overlap": null, + "svdb_inhouse_max_count": null, + "clinvar_sv_min_overlap": null, + "clinvar_sv_min_pathogenicity": null, + "sv_size_min": null, + "sv_size_max": null, + "sv_types": [ + "DEL", + "DUP", + "INV", + "INS", + "BND", + "CNV" + ], + "sv_sub_types": [ + "DEL", + "DEL:ME", + "DEL:ME:SVA", + "DEL:ME:L1", + "DEL:ME:ALU", + "DUP", + "DUP:TANDEM", + "INV", + "INS", + "INS:ME", + "INS:ME:SVA", + "INS:ME:L1", + "INS:ME:ALU", + "BND", + "CNV" + ], + "tx_effects": [ + "transcript_variant", + "exon_variant", + "splice_region_variant", + "intron_variant", + "upstream_variant", + "downstream_variant", + "intergenic_variant" + ], + "gene_allowlist": null, + "genomic_region": null, + "regulatory_overlap": 100, + "regulatory_ensembl_features": null, + "regulatory_vista_validation": null, + "regulatory_custom_configs": [], + "tad_set": null, + "genotype": {}, + "genotype_criteria": [], + "recessive_mode": null, + "recessive_index": null +} diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__genotype_criteria_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__genotype_criteria_serde_smoke.snap new file mode 100644 index 00000000..4416248e --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__genotype_criteria_serde_smoke.snap @@ -0,0 +1,51 @@ +--- +source: src/strucvars/query/schema.rs +expression: "serde_json::to_string_pretty(&crit).unwrap()" +--- +{ + "genotype": "het", + "select_sv_sub_type": [], + "select_sv_min_size": null, + "select_sv_max_size": null, + "max_brk_segdup": null, + "max_brk_repeat": null, + "max_brk_segduprepeat": null, + "gt_one_of": null, + "min_gq": null, + "min_pr_cov": null, + "max_pr_cov": null, + "min_pr_ref": null, + "max_pr_ref": null, + "min_pr_var": null, + "max_pr_var": null, + "min_pr_ab": null, + "max_pr_ab": null, + "min_sr_cov": null, + "max_sr_cov": null, + "min_sr_ref": null, + "max_sr_ref": null, + "min_sr_var": null, + "max_sr_var": null, + "min_sr_ab": null, + "max_sr_ab": null, + "min_srpr_cov": null, + "max_srpr_cov": null, + "min_srpr_ref": null, + "max_srpr_ref": null, + "min_srpr_var": null, + "max_srpr_var": null, + "min_srpr_ab": null, + "max_srpr_ab": null, + "min_rd_dev": null, + "max_rd_dev": null, + "min_amq": null, + "max_amq": null, + "missing_gt_ok": true, + "missing_gq_ok": true, + "missing_pr_ok": true, + "missing_sr_ok": true, + "missing_srpr_ok": true, + "missing_rd_dev_ok": true, + "missing_amq_ok": true, + "comment": null +} diff --git a/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__structural_variant_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__structural_variant_serde_smoke.snap new file mode 100644 index 00000000..c6015179 --- /dev/null +++ b/src/strucvars/query/snapshots/varfish_server_worker__strucvars__query__schema__tests__structural_variant_serde_smoke.snap @@ -0,0 +1,14 @@ +--- +source: src/strucvars/query/schema.rs +expression: "serde_json::to_string_pretty(&sv).unwrap()" +--- +{ + "chrom": "chr1", + "pos": 123, + "sv_type": "DEL", + "sv_sub_type": "DEL:ME:L1", + "chrom2": null, + "end": 245, + "strand_orientation": "3to5", + "call_info": {} +} diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__masked__test__load_masked_db_records.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__masked__test__load_masked_db_records.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__masked__test__load_masked_db_records.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__masked__test__load_masked_db_records.snap diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__masked__test__masked_db_fetch_records.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__masked__test__masked_db_fetch_records.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__masked__test__masked_db_fetch_records.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__masked__test__masked_db_fetch_records.snap diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__call_info_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__call_info_serde_smoke.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__call_info_serde_smoke.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__call_info_serde_smoke.snap diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__case_query_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__case_query_serde_smoke.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__case_query_serde_smoke.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__case_query_serde_smoke.snap diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__genotype_criteria_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__genotype_criteria_serde_smoke.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__genotype_criteria_serde_smoke.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__genotype_criteria_serde_smoke.snap diff --git a/src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__structural_variant_serde_smoke.snap b/src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__structural_variant_serde_smoke.snap similarity index 100% rename from src/sv/query/snapshots/varfish_server_worker__sv__query__schema__tests__structural_variant_serde_smoke.snap rename to src/strucvars/query/snapshots/varfish_server_worker__sv__query__schema__tests__structural_variant_serde_smoke.snap diff --git a/src/sv/query/tads.rs b/src/strucvars/query/tads.rs similarity index 100% rename from src/sv/query/tads.rs rename to src/strucvars/query/tads.rs diff --git a/src/sv/mod.rs b/src/sv/mod.rs deleted file mode 100644 index 67350db2..00000000 --- a/src/sv/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod query; diff --git a/tests/db/strucvars/ingest/delly2-min.ped b/tests/db/strucvars/ingest/delly2-min.ped new file mode 100644 index 00000000..44eea94d --- /dev/null +++ b/tests/db/strucvars/ingest/delly2-min.ped @@ -0,0 +1,3 @@ +FAM index father mother 1 2 +FAM father 0 0 1 1 +FAM mother 0 0 2 1 diff --git a/tests/db/strucvars/ingest/delly2-min.vcf b/tests/db/strucvars/ingest/delly2-min.vcf new file mode 100644 index 00000000..b6588dd0 --- /dev/null +++ b/tests/db/strucvars/ingest/delly2-min.vcf @@ -0,0 +1,132 @@ +##fileformat=VCFv4.2 +##FILTER= +##fileDate=20220829 +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##reference=/fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##bcftools_viewVersion=1.15.1+htslib-1.15.1 +##bcftools_viewCommand=view -O z -o work/bwa.delly2.SAMPLE-N1-DNA1-WGS1/out/bwa.delly2.SAMPLE-N1-DNA1-WGS1.vcf.gz /data/gpfs-1/users/holtgrem_c/scratch/tmp/hpc-cpu-164/20220829/tmp.jo3WnHhrQp/cwd/1.bcf; Date=Mon Aug 29 16:04:36 2022 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother +1 586412 DEL00000004 C 92 PASS PRECISE;SVTYPE=DEL;SVMETHOD=EMBL.DELLYv1.1.3;END=586439;PE=0;MAPQ=0;CT=3to5;CIPOS=-20,20;CIEND=-20,20;SRMAPQ=23;INSLEN=0;HOMLEN=20;SR=4;SRQ=1;CONSENSUS=CTCAGGGTGTTCGGGATAAAGAAGACTCAGGAAGACAAGTATGAAGCATAATCTGTGACATTATTGATATCTTCCTGAAGAACATAATTCCTGCCTACCATCAACAAGCATCAATACTTTCTACCAGCTATTCTCAACCCTCATCATCGGAAGAGACAGACACTGACTGTGTCAAA;CE=1.96018;AC=3;AN=6 GT:GL:GQ:FT:RCL:RC:RCR:RDCN:DR:DV:RR:RV 0/1:-5.90527,0,-14.2974:59:PASS:203:373:203:2:0:0:7:4 0/1:-2.23535,0,-11.3961:22:PASS:188:321:130:2:0:0:6:2 0/1:-1.0313,0,-19.791:10:LowQual:327:515:247:2:0:0:11:2 diff --git a/tests/db/strucvars/ingest/dragen-cnv-min.ped b/tests/db/strucvars/ingest/dragen-cnv-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/db/strucvars/ingest/dragen-cnv-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/db/strucvars/ingest/dragen-cnv-min.vcf b/tests/db/strucvars/ingest/dragen-cnv-min.vcf new file mode 100644 index 00000000..046597a9 --- /dev/null +++ b/tests/db/strucvars/ingest/dragen-cnv-min.vcf @@ -0,0 +1,48 @@ +##fileformat=VCFv4.2 +##DRAGENVersion= +##DRAGENCommandLine= +##reference=file:///staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##ALT= +##ALT= +##ALT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##CoverageUniformity=0.333610 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1283844 DRAGEN:LOSS:1:1283845-1284844 N 20 cnvLength SVLEN=-1000;SVTYPE=CNV;END=1284844;REFLEN=1000 GT:SM:CN:BC:PE 0/1:0.321909:1:1:1,1 diff --git a/tests/db/strucvars/ingest/dragen-sv-min.ped b/tests/db/strucvars/ingest/dragen-sv-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/db/strucvars/ingest/dragen-sv-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/db/strucvars/ingest/dragen-sv-min.vcf b/tests/db/strucvars/ingest/dragen-sv-min.vcf new file mode 100644 index 00000000..43d239f8 --- /dev/null +++ b/tests/db/strucvars/ingest/dragen-sv-min.vcf @@ -0,0 +1,133 @@ +##fileformat=VCFv4.1 +##fileDate=20220321 +##source=DRAGEN 07.021.624.3.10.4 +##reference=file:///staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##cmdline=dragen --ref-dir /staging/human/reference/hs37d5/hs37d5.fa.k_21.f_16.m_149 --fastq-file1 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R1_001.fastq.gz --fastq-file2 /mnt/smb01-hum/NGSRawData/220318_A01077_0174_AH7JGVDMXY/Data/Intensities/BaseCalls/NA-12878WGS-Genom-size_S1_R2_001.fastq.gz --output-directory /staging/output/220318_A01077_0174_AH7JGVDMXY/NA-12878WGSWGS/ --output-file-prefix NA-12878WGSWGS_dragen --RGID WGS --RGSM NA-12878WGSWGS --num-threads 46 --enable-map-align true --enable-map-align-output true --enable-duplicate-marking true --enable-variant-caller true --qc-cross-cont-vcf /opt/edico/config/sample_cross_contamination_resource_GRCh37.vcf.gz --enable-cnv true --cnv-enable-self-normalization true --enable-sv true --qc-coverage-region-1 /staging/human/bed/CDS-v19-ROIs_v2.bed --qc-coverage-reports-1 cov_report full_res --qc-coverage-region-2 /staging/human/bed/Regions_Exomev8.bed --qc-coverage-reports-2 cov_report full_res --qc-coverage-region-3 /staging/human/bed/Padded_Exomev8.bed --qc-coverage-reports-3 cov_report full_res +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1598413 MantaDEL:19042:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 762 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:53:815,56,0:0,2:0,20 diff --git a/tests/db/strucvars/ingest/gcnv-min.ped b/tests/db/strucvars/ingest/gcnv-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/db/strucvars/ingest/gcnv-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/db/strucvars/ingest/gcnv-min.vcf b/tests/db/strucvars/ingest/gcnv-min.vcf new file mode 100644 index 00000000..f9239f45 --- /dev/null +++ b/tests/db/strucvars/ingest/gcnv-min.vcf @@ -0,0 +1,112 @@ +##fileformat=VCFv4.2 +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##GATKCommandLine= +##GATKCommandLine= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##gcnvVcfSchemaVersion=2.0 +##source=JointGermlineCNVSegmentation +##source=PostprocessGermlineCNVCalls +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 4124001 . N . . END=4125000;SVTYPE=DEL;SVLEN=-999;AC=1;AN=1 GT:CN:NP:QA:QS:QSE:QSS 1:1:1:107:107:107:107 diff --git a/tests/db/strucvars/ingest/manta-min.ped b/tests/db/strucvars/ingest/manta-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/db/strucvars/ingest/manta-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/db/strucvars/ingest/manta-min.vcf b/tests/db/strucvars/ingest/manta-min.vcf new file mode 100644 index 00000000..bcc9fdea --- /dev/null +++ b/tests/db/strucvars/ingest/manta-min.vcf @@ -0,0 +1,128 @@ +##fileformat=VCFv4.1 +##fileDate=20220907 +##source=GenerateSVCandidates 1.6.0 +##reference=file:///fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT=0.999"> +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##ALT= +##ALT= +##ALT= +##cmdline=configManta.py --referenceFasta /fast/projects/cubit/20.05/static_data/reference/GRCh37/hs37d5/hs37d5.fa --runDir work/bwa.manta.SAMPLE-N1-DNA1-WGS1/work --bam ngs_mapping/output/bwa.SAMPLE-N1-DNA1-WGS1/out/bwa.SAMPLE-N1-DNA1-WGS1.bam +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1598413 MantaDEL:19042:0:0:0:0:0 ACACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT A 762 PASS END=1598580;SVTYPE=DEL;SVLEN=-167;CIGAR=1M167D;CIPOS=0,18;HOMLEN=18;HOMSEQ=CACGCCTGTAATCCCAGC GT:FT:GQ:PL:PR:SR 1/1:PASS:53:815,56,0:0,2:0,20 diff --git a/tests/db/strucvars/ingest/melt-min.ped b/tests/db/strucvars/ingest/melt-min.ped new file mode 100644 index 00000000..de863440 --- /dev/null +++ b/tests/db/strucvars/ingest/melt-min.ped @@ -0,0 +1 @@ +FAM SAMPLE 0 0 1 2 diff --git a/tests/db/strucvars/ingest/melt-min.vcf b/tests/db/strucvars/ingest/melt-min.vcf new file mode 100644 index 00000000..f0338686 --- /dev/null +++ b/tests/db/strucvars/ingest/melt-min.vcf @@ -0,0 +1,55 @@ +##fileformat=VCFv4.2 +##FILTER= +##fileDate=Jan 10, 2023, 7:53:31 AM +##source=MELTv2.2.2 +##reference=hs37d5.fa +##ALT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##ALT= +##INFO= +##ALT= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1000 . T . PASS TSD=null;ASSESS=0;INTERNAL=null,null;SVTYPE=ALU;SVLEN=279;MEINFO=AluYa5,1,280,+;DIFF=0.53:t89c,g90a,c96a,g145a,c174t,g237c;LP=2;RP=1;RA=1;PRIOR=false;SR=0 GT:GL:DP:AD 1/1:-30,-1.81,-0:3:3 +1 2000 . C . lc;ac0 TSD=null;ASSESS=2;INTERNAL=null,null;SVTYPE=SVA;SVLEN=1240;MEINFO=SVA,75,1315,-;DIFF=0.1:n1-74,a78c,c126t,c158t,a167g,g192c,t193c,n201-1316;LP=6;RP=1;RA=2.585;PRIOR=false;SR=0 GT:GL:DP:AD 0/0:-0.6,-4.82,-84.6:8:1 +1 3000 . A . ac0 TSD=null;ASSESS=3;INTERNAL=NM_014813,INTRONIC;SVTYPE=LINE1;SVLEN=121;MEINFO=L1Ambig,5897,6018,+;DIFF=0.01:n1-5896,t5901c,a5929g,c5930a,a5931g,a5989t,n5996-6019;LP=10;RP=1;RA=3.322;ISTP=0;PRIOR=false;SR=1 GT:GL:DP:AD 0/0:-0.6,-28.3,-545.8:47:1 diff --git a/tests/db/strucvars/ingest/popdel-min.ped b/tests/db/strucvars/ingest/popdel-min.ped new file mode 100644 index 00000000..44eea94d --- /dev/null +++ b/tests/db/strucvars/ingest/popdel-min.ped @@ -0,0 +1,3 @@ +FAM index father mother 1 2 +FAM father 0 0 1 1 +FAM mother 0 0 2 1 diff --git a/tests/db/strucvars/ingest/popdel-min.vcf b/tests/db/strucvars/ingest/popdel-min.vcf new file mode 100644 index 00000000..a7cea57b --- /dev/null +++ b/tests/db/strucvars/ingest/popdel-min.vcf @@ -0,0 +1,121 @@ +##fileformat=VCFv4.3 +##FILTER= +##fileDate=[20230123] +##source=PopDel-V1.1.2 +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##bcftools_annotateVersion=1.16+htslib-1.16 +##bcftools_annotateCommand=annotate --header-lines /data/cephfs-1/scratch/groups/cubi/holtgrem_c/tmp/hpc-cpu-56/20230123/tmp.mQMOoq0Xvg/header.txt /data/cephfs-1/scratch/groups/cubi/holtgrem_c/tmp/hpc-cpu-56/20230123/tmp.mQMOoq0Xvg/tmp.vcf; Date=Mon Jan 23 14:28:42 2023 +##bcftools_concatVersion=1.16+htslib-1.16 +##bcftools_concatCommand=concat work/bwa_mem2.popdel_call.1-1-10020000/out/bwa_mem2.popdel_call.1-1-10020000.vcf.gz work/bwa_mem2.popdel_call.1-9980000-20020000/out/bwa_mem2.popdel_call.1-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.1-19980000-30020000/out/bwa_mem2.popdel_call.1-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.1-29980000-40020000/out/bwa_mem2.popdel_call.1-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.1-39980000-50020000/out/bwa_mem2.popdel_call.1-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.1-49980000-60020000/out/bwa_mem2.popdel_call.1-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.1-59980000-70020000/out/bwa_mem2.popdel_call.1-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.1-69980000-80020000/out/bwa_mem2.popdel_call.1-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.1-79980000-90020000/out/bwa_mem2.popdel_call.1-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.1-89980000-100020000/out/bwa_mem2.popdel_call.1-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.1-99980000-110020000/out/bwa_mem2.popdel_call.1-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.1-109980000-120020000/out/bwa_mem2.popdel_call.1-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.1-119980000-130020000/out/bwa_mem2.popdel_call.1-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.1-129980000-140020000/out/bwa_mem2.popdel_call.1-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.1-139980000-150020000/out/bwa_mem2.popdel_call.1-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.1-149980000-160020000/out/bwa_mem2.popdel_call.1-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.1-159980000-170020000/out/bwa_mem2.popdel_call.1-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.1-169980000-180020000/out/bwa_mem2.popdel_call.1-169980000-180020000.vcf.gz work/bwa_mem2.popdel_call.1-179980000-190020000/out/bwa_mem2.popdel_call.1-179980000-190020000.vcf.gz work/bwa_mem2.popdel_call.1-189980000-200020000/out/bwa_mem2.popdel_call.1-189980000-200020000.vcf.gz work/bwa_mem2.popdel_call.1-199980000-210020000/out/bwa_mem2.popdel_call.1-199980000-210020000.vcf.gz work/bwa_mem2.popdel_call.1-209980000-220020000/out/bwa_mem2.popdel_call.1-209980000-220020000.vcf.gz work/bwa_mem2.popdel_call.1-219980000-230020000/out/bwa_mem2.popdel_call.1-219980000-230020000.vcf.gz work/bwa_mem2.popdel_call.1-229980000-240020000/out/bwa_mem2.popdel_call.1-229980000-240020000.vcf.gz work/bwa_mem2.popdel_call.1-239980000-249250621/out/bwa_mem2.popdel_call.1-239980000-249250621.vcf.gz work/bwa_mem2.popdel_call.2-1-10020000/out/bwa_mem2.popdel_call.2-1-10020000.vcf.gz work/bwa_mem2.popdel_call.2-9980000-20020000/out/bwa_mem2.popdel_call.2-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.2-19980000-30020000/out/bwa_mem2.popdel_call.2-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.2-29980000-40020000/out/bwa_mem2.popdel_call.2-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.2-39980000-50020000/out/bwa_mem2.popdel_call.2-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.2-49980000-60020000/out/bwa_mem2.popdel_call.2-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.2-59980000-70020000/out/bwa_mem2.popdel_call.2-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.2-69980000-80020000/out/bwa_mem2.popdel_call.2-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.2-79980000-90020000/out/bwa_mem2.popdel_call.2-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.2-89980000-100020000/out/bwa_mem2.popdel_call.2-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.2-99980000-110020000/out/bwa_mem2.popdel_call.2-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.2-109980000-120020000/out/bwa_mem2.popdel_call.2-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.2-119980000-130020000/out/bwa_mem2.popdel_call.2-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.2-129980000-140020000/out/bwa_mem2.popdel_call.2-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.2-139980000-150020000/out/bwa_mem2.popdel_call.2-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.2-149980000-160020000/out/bwa_mem2.popdel_call.2-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.2-159980000-170020000/out/bwa_mem2.popdel_call.2-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.2-169980000-180020000/out/bwa_mem2.popdel_call.2-169980000-180020000.vcf.gz work/bwa_mem2.popdel_call.2-179980000-190020000/out/bwa_mem2.popdel_call.2-179980000-190020000.vcf.gz work/bwa_mem2.popdel_call.2-189980000-200020000/out/bwa_mem2.popdel_call.2-189980000-200020000.vcf.gz work/bwa_mem2.popdel_call.2-199980000-210020000/out/bwa_mem2.popdel_call.2-199980000-210020000.vcf.gz work/bwa_mem2.popdel_call.2-209980000-220020000/out/bwa_mem2.popdel_call.2-209980000-220020000.vcf.gz work/bwa_mem2.popdel_call.2-219980000-230020000/out/bwa_mem2.popdel_call.2-219980000-230020000.vcf.gz work/bwa_mem2.popdel_call.2-229980000-240020000/out/bwa_mem2.popdel_call.2-229980000-240020000.vcf.gz work/bwa_mem2.popdel_call.2-239980000-243199373/out/bwa_mem2.popdel_call.2-239980000-243199373.vcf.gz work/bwa_mem2.popdel_call.3-1-10020000/out/bwa_mem2.popdel_call.3-1-10020000.vcf.gz work/bwa_mem2.popdel_call.3-9980000-20020000/out/bwa_mem2.popdel_call.3-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.3-19980000-30020000/out/bwa_mem2.popdel_call.3-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.3-29980000-40020000/out/bwa_mem2.popdel_call.3-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.3-39980000-50020000/out/bwa_mem2.popdel_call.3-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.3-49980000-60020000/out/bwa_mem2.popdel_call.3-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.3-59980000-70020000/out/bwa_mem2.popdel_call.3-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.3-69980000-80020000/out/bwa_mem2.popdel_call.3-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.3-79980000-90020000/out/bwa_mem2.popdel_call.3-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.3-89980000-100020000/out/bwa_mem2.popdel_call.3-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.3-99980000-110020000/out/bwa_mem2.popdel_call.3-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.3-109980000-120020000/out/bwa_mem2.popdel_call.3-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.3-119980000-130020000/out/bwa_mem2.popdel_call.3-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.3-129980000-140020000/out/bwa_mem2.popdel_call.3-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.3-139980000-150020000/out/bwa_mem2.popdel_call.3-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.3-149980000-160020000/out/bwa_mem2.popdel_call.3-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.3-159980000-170020000/out/bwa_mem2.popdel_call.3-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.3-169980000-180020000/out/bwa_mem2.popdel_call.3-169980000-180020000.vcf.gz work/bwa_mem2.popdel_call.3-179980000-190020000/out/bwa_mem2.popdel_call.3-179980000-190020000.vcf.gz work/bwa_mem2.popdel_call.3-189980000-198022430/out/bwa_mem2.popdel_call.3-189980000-198022430.vcf.gz work/bwa_mem2.popdel_call.4-1-10020000/out/bwa_mem2.popdel_call.4-1-10020000.vcf.gz work/bwa_mem2.popdel_call.4-9980000-20020000/out/bwa_mem2.popdel_call.4-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.4-19980000-30020000/out/bwa_mem2.popdel_call.4-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.4-29980000-40020000/out/bwa_mem2.popdel_call.4-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.4-39980000-50020000/out/bwa_mem2.popdel_call.4-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.4-49980000-60020000/out/bwa_mem2.popdel_call.4-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.4-59980000-70020000/out/bwa_mem2.popdel_call.4-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.4-69980000-80020000/out/bwa_mem2.popdel_call.4-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.4-79980000-90020000/out/bwa_mem2.popdel_call.4-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.4-89980000-100020000/out/bwa_mem2.popdel_call.4-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.4-99980000-110020000/out/bwa_mem2.popdel_call.4-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.4-109980000-120020000/out/bwa_mem2.popdel_call.4-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.4-119980000-130020000/out/bwa_mem2.popdel_call.4-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.4-129980000-140020000/out/bwa_mem2.popdel_call.4-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.4-139980000-150020000/out/bwa_mem2.popdel_call.4-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.4-149980000-160020000/out/bwa_mem2.popdel_call.4-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.4-159980000-170020000/out/bwa_mem2.popdel_call.4-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.4-169980000-180020000/out/bwa_mem2.popdel_call.4-169980000-180020000.vcf.gz work/bwa_mem2.popdel_call.4-179980000-190020000/out/bwa_mem2.popdel_call.4-179980000-190020000.vcf.gz work/bwa_mem2.popdel_call.4-189980000-191154276/out/bwa_mem2.popdel_call.4-189980000-191154276.vcf.gz work/bwa_mem2.popdel_call.5-1-10020000/out/bwa_mem2.popdel_call.5-1-10020000.vcf.gz work/bwa_mem2.popdel_call.5-9980000-20020000/out/bwa_mem2.popdel_call.5-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.5-19980000-30020000/out/bwa_mem2.popdel_call.5-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.5-29980000-40020000/out/bwa_mem2.popdel_call.5-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.5-39980000-50020000/out/bwa_mem2.popdel_call.5-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.5-49980000-60020000/out/bwa_mem2.popdel_call.5-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.5-59980000-70020000/out/bwa_mem2.popdel_call.5-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.5-69980000-80020000/out/bwa_mem2.popdel_call.5-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.5-79980000-90020000/out/bwa_mem2.popdel_call.5-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.5-89980000-100020000/out/bwa_mem2.popdel_call.5-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.5-99980000-110020000/out/bwa_mem2.popdel_call.5-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.5-109980000-120020000/out/bwa_mem2.popdel_call.5-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.5-119980000-130020000/out/bwa_mem2.popdel_call.5-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.5-129980000-140020000/out/bwa_mem2.popdel_call.5-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.5-139980000-150020000/out/bwa_mem2.popdel_call.5-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.5-149980000-160020000/out/bwa_mem2.popdel_call.5-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.5-159980000-170020000/out/bwa_mem2.popdel_call.5-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.5-169980000-180020000/out/bwa_mem2.popdel_call.5-169980000-180020000.vcf.gz work/bwa_mem2.popdel_call.5-179980000-180915260/out/bwa_mem2.popdel_call.5-179980000-180915260.vcf.gz work/bwa_mem2.popdel_call.6-1-10020000/out/bwa_mem2.popdel_call.6-1-10020000.vcf.gz work/bwa_mem2.popdel_call.6-9980000-20020000/out/bwa_mem2.popdel_call.6-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.6-19980000-30020000/out/bwa_mem2.popdel_call.6-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.6-29980000-40020000/out/bwa_mem2.popdel_call.6-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.6-39980000-50020000/out/bwa_mem2.popdel_call.6-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.6-49980000-60020000/out/bwa_mem2.popdel_call.6-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.6-59980000-70020000/out/bwa_mem2.popdel_call.6-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.6-69980000-80020000/out/bwa_mem2.popdel_call.6-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.6-79980000-90020000/out/bwa_mem2.popdel_call.6-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.6-89980000-100020000/out/bwa_mem2.popdel_call.6-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.6-99980000-110020000/out/bwa_mem2.popdel_call.6-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.6-109980000-120020000/out/bwa_mem2.popdel_call.6-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.6-119980000-130020000/out/bwa_mem2.popdel_call.6-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.6-129980000-140020000/out/bwa_mem2.popdel_call.6-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.6-139980000-150020000/out/bwa_mem2.popdel_call.6-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.6-149980000-160020000/out/bwa_mem2.popdel_call.6-149980000-160020000.vcf.gz work/bwa_mem2.popdel_call.6-159980000-170020000/out/bwa_mem2.popdel_call.6-159980000-170020000.vcf.gz work/bwa_mem2.popdel_call.6-169980000-171115067/out/bwa_mem2.popdel_call.6-169980000-171115067.vcf.gz work/bwa_mem2.popdel_call.7-1-10020000/out/bwa_mem2.popdel_call.7-1-10020000.vcf.gz work/bwa_mem2.popdel_call.7-9980000-20020000/out/bwa_mem2.popdel_call.7-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.7-19980000-30020000/out/bwa_mem2.popdel_call.7-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.7-29980000-40020000/out/bwa_mem2.popdel_call.7-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.7-39980000-50020000/out/bwa_mem2.popdel_call.7-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.7-49980000-60020000/out/bwa_mem2.popdel_call.7-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.7-59980000-70020000/out/bwa_mem2.popdel_call.7-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.7-69980000-80020000/out/bwa_mem2.popdel_call.7-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.7-79980000-90020000/out/bwa_mem2.popdel_call.7-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.7-89980000-100020000/out/bwa_mem2.popdel_call.7-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.7-99980000-110020000/out/bwa_mem2.popdel_call.7-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.7-109980000-120020000/out/bwa_mem2.popdel_call.7-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.7-119980000-130020000/out/bwa_mem2.popdel_call.7-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.7-129980000-140020000/out/bwa_mem2.popdel_call.7-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.7-139980000-150020000/out/bwa_mem2.popdel_call.7-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.7-149980000-159138663/out/bwa_mem2.popdel_call.7-149980000-159138663.vcf.gz work/bwa_mem2.popdel_call.8-1-10020000/out/bwa_mem2.popdel_call.8-1-10020000.vcf.gz work/bwa_mem2.popdel_call.8-9980000-20020000/out/bwa_mem2.popdel_call.8-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.8-19980000-30020000/out/bwa_mem2.popdel_call.8-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.8-29980000-40020000/out/bwa_mem2.popdel_call.8-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.8-39980000-50020000/out/bwa_mem2.popdel_call.8-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.8-49980000-60020000/out/bwa_mem2.popdel_call.8-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.8-59980000-70020000/out/bwa_mem2.popdel_call.8-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.8-69980000-80020000/out/bwa_mem2.popdel_call.8-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.8-79980000-90020000/out/bwa_mem2.popdel_call.8-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.8-89980000-100020000/out/bwa_mem2.popdel_call.8-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.8-99980000-110020000/out/bwa_mem2.popdel_call.8-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.8-109980000-120020000/out/bwa_mem2.popdel_call.8-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.8-119980000-130020000/out/bwa_mem2.popdel_call.8-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.8-129980000-140020000/out/bwa_mem2.popdel_call.8-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.8-139980000-146364022/out/bwa_mem2.popdel_call.8-139980000-146364022.vcf.gz work/bwa_mem2.popdel_call.9-1-10020000/out/bwa_mem2.popdel_call.9-1-10020000.vcf.gz work/bwa_mem2.popdel_call.9-9980000-20020000/out/bwa_mem2.popdel_call.9-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.9-19980000-30020000/out/bwa_mem2.popdel_call.9-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.9-29980000-40020000/out/bwa_mem2.popdel_call.9-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.9-39980000-50020000/out/bwa_mem2.popdel_call.9-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.9-49980000-60020000/out/bwa_mem2.popdel_call.9-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.9-59980000-70020000/out/bwa_mem2.popdel_call.9-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.9-69980000-80020000/out/bwa_mem2.popdel_call.9-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.9-79980000-90020000/out/bwa_mem2.popdel_call.9-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.9-89980000-100020000/out/bwa_mem2.popdel_call.9-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.9-99980000-110020000/out/bwa_mem2.popdel_call.9-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.9-109980000-120020000/out/bwa_mem2.popdel_call.9-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.9-119980000-130020000/out/bwa_mem2.popdel_call.9-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.9-129980000-140020000/out/bwa_mem2.popdel_call.9-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.9-139980000-141213431/out/bwa_mem2.popdel_call.9-139980000-141213431.vcf.gz work/bwa_mem2.popdel_call.10-1-10020000/out/bwa_mem2.popdel_call.10-1-10020000.vcf.gz work/bwa_mem2.popdel_call.10-9980000-20020000/out/bwa_mem2.popdel_call.10-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.10-19980000-30020000/out/bwa_mem2.popdel_call.10-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.10-29980000-40020000/out/bwa_mem2.popdel_call.10-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.10-39980000-50020000/out/bwa_mem2.popdel_call.10-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.10-49980000-60020000/out/bwa_mem2.popdel_call.10-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.10-59980000-70020000/out/bwa_mem2.popdel_call.10-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.10-69980000-80020000/out/bwa_mem2.popdel_call.10-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.10-79980000-90020000/out/bwa_mem2.popdel_call.10-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.10-89980000-100020000/out/bwa_mem2.popdel_call.10-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.10-99980000-110020000/out/bwa_mem2.popdel_call.10-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.10-109980000-120020000/out/bwa_mem2.popdel_call.10-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.10-119980000-130020000/out/bwa_mem2.popdel_call.10-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.10-129980000-135534747/out/bwa_mem2.popdel_call.10-129980000-135534747.vcf.gz work/bwa_mem2.popdel_call.11-1-10020000/out/bwa_mem2.popdel_call.11-1-10020000.vcf.gz work/bwa_mem2.popdel_call.11-9980000-20020000/out/bwa_mem2.popdel_call.11-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.11-19980000-30020000/out/bwa_mem2.popdel_call.11-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.11-29980000-40020000/out/bwa_mem2.popdel_call.11-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.11-39980000-50020000/out/bwa_mem2.popdel_call.11-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.11-49980000-60020000/out/bwa_mem2.popdel_call.11-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.11-59980000-70020000/out/bwa_mem2.popdel_call.11-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.11-69980000-80020000/out/bwa_mem2.popdel_call.11-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.11-79980000-90020000/out/bwa_mem2.popdel_call.11-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.11-89980000-100020000/out/bwa_mem2.popdel_call.11-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.11-99980000-110020000/out/bwa_mem2.popdel_call.11-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.11-109980000-120020000/out/bwa_mem2.popdel_call.11-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.11-119980000-130020000/out/bwa_mem2.popdel_call.11-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.11-129980000-135006516/out/bwa_mem2.popdel_call.11-129980000-135006516.vcf.gz work/bwa_mem2.popdel_call.12-1-10020000/out/bwa_mem2.popdel_call.12-1-10020000.vcf.gz work/bwa_mem2.popdel_call.12-9980000-20020000/out/bwa_mem2.popdel_call.12-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.12-19980000-30020000/out/bwa_mem2.popdel_call.12-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.12-29980000-40020000/out/bwa_mem2.popdel_call.12-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.12-39980000-50020000/out/bwa_mem2.popdel_call.12-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.12-49980000-60020000/out/bwa_mem2.popdel_call.12-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.12-59980000-70020000/out/bwa_mem2.popdel_call.12-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.12-69980000-80020000/out/bwa_mem2.popdel_call.12-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.12-79980000-90020000/out/bwa_mem2.popdel_call.12-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.12-89980000-100020000/out/bwa_mem2.popdel_call.12-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.12-99980000-110020000/out/bwa_mem2.popdel_call.12-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.12-109980000-120020000/out/bwa_mem2.popdel_call.12-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.12-119980000-130020000/out/bwa_mem2.popdel_call.12-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.12-129980000-133851895/out/bwa_mem2.popdel_call.12-129980000-133851895.vcf.gz work/bwa_mem2.popdel_call.13-1-10020000/out/bwa_mem2.popdel_call.13-1-10020000.vcf.gz work/bwa_mem2.popdel_call.13-9980000-20020000/out/bwa_mem2.popdel_call.13-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.13-19980000-30020000/out/bwa_mem2.popdel_call.13-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.13-29980000-40020000/out/bwa_mem2.popdel_call.13-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.13-39980000-50020000/out/bwa_mem2.popdel_call.13-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.13-49980000-60020000/out/bwa_mem2.popdel_call.13-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.13-59980000-70020000/out/bwa_mem2.popdel_call.13-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.13-69980000-80020000/out/bwa_mem2.popdel_call.13-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.13-79980000-90020000/out/bwa_mem2.popdel_call.13-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.13-89980000-100020000/out/bwa_mem2.popdel_call.13-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.13-99980000-110020000/out/bwa_mem2.popdel_call.13-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.13-109980000-115169878/out/bwa_mem2.popdel_call.13-109980000-115169878.vcf.gz work/bwa_mem2.popdel_call.14-1-10020000/out/bwa_mem2.popdel_call.14-1-10020000.vcf.gz work/bwa_mem2.popdel_call.14-9980000-20020000/out/bwa_mem2.popdel_call.14-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.14-19980000-30020000/out/bwa_mem2.popdel_call.14-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.14-29980000-40020000/out/bwa_mem2.popdel_call.14-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.14-39980000-50020000/out/bwa_mem2.popdel_call.14-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.14-49980000-60020000/out/bwa_mem2.popdel_call.14-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.14-59980000-70020000/out/bwa_mem2.popdel_call.14-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.14-69980000-80020000/out/bwa_mem2.popdel_call.14-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.14-79980000-90020000/out/bwa_mem2.popdel_call.14-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.14-89980000-100020000/out/bwa_mem2.popdel_call.14-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.14-99980000-107349540/out/bwa_mem2.popdel_call.14-99980000-107349540.vcf.gz work/bwa_mem2.popdel_call.15-1-10020000/out/bwa_mem2.popdel_call.15-1-10020000.vcf.gz work/bwa_mem2.popdel_call.15-9980000-20020000/out/bwa_mem2.popdel_call.15-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.15-19980000-30020000/out/bwa_mem2.popdel_call.15-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.15-29980000-40020000/out/bwa_mem2.popdel_call.15-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.15-39980000-50020000/out/bwa_mem2.popdel_call.15-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.15-49980000-60020000/out/bwa_mem2.popdel_call.15-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.15-59980000-70020000/out/bwa_mem2.popdel_call.15-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.15-69980000-80020000/out/bwa_mem2.popdel_call.15-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.15-79980000-90020000/out/bwa_mem2.popdel_call.15-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.15-89980000-100020000/out/bwa_mem2.popdel_call.15-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.15-99980000-102531392/out/bwa_mem2.popdel_call.15-99980000-102531392.vcf.gz work/bwa_mem2.popdel_call.16-1-10020000/out/bwa_mem2.popdel_call.16-1-10020000.vcf.gz work/bwa_mem2.popdel_call.16-9980000-20020000/out/bwa_mem2.popdel_call.16-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.16-19980000-30020000/out/bwa_mem2.popdel_call.16-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.16-29980000-40020000/out/bwa_mem2.popdel_call.16-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.16-39980000-50020000/out/bwa_mem2.popdel_call.16-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.16-49980000-60020000/out/bwa_mem2.popdel_call.16-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.16-59980000-70020000/out/bwa_mem2.popdel_call.16-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.16-69980000-80020000/out/bwa_mem2.popdel_call.16-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.16-79980000-90020000/out/bwa_mem2.popdel_call.16-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.16-89980000-90354753/out/bwa_mem2.popdel_call.16-89980000-90354753.vcf.gz work/bwa_mem2.popdel_call.17-1-10020000/out/bwa_mem2.popdel_call.17-1-10020000.vcf.gz work/bwa_mem2.popdel_call.17-9980000-20020000/out/bwa_mem2.popdel_call.17-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.17-19980000-30020000/out/bwa_mem2.popdel_call.17-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.17-29980000-40020000/out/bwa_mem2.popdel_call.17-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.17-39980000-50020000/out/bwa_mem2.popdel_call.17-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.17-49980000-60020000/out/bwa_mem2.popdel_call.17-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.17-59980000-70020000/out/bwa_mem2.popdel_call.17-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.17-69980000-80020000/out/bwa_mem2.popdel_call.17-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.17-79980000-81195210/out/bwa_mem2.popdel_call.17-79980000-81195210.vcf.gz work/bwa_mem2.popdel_call.18-1-10020000/out/bwa_mem2.popdel_call.18-1-10020000.vcf.gz work/bwa_mem2.popdel_call.18-9980000-20020000/out/bwa_mem2.popdel_call.18-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.18-19980000-30020000/out/bwa_mem2.popdel_call.18-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.18-29980000-40020000/out/bwa_mem2.popdel_call.18-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.18-39980000-50020000/out/bwa_mem2.popdel_call.18-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.18-49980000-60020000/out/bwa_mem2.popdel_call.18-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.18-59980000-70020000/out/bwa_mem2.popdel_call.18-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.18-69980000-78077248/out/bwa_mem2.popdel_call.18-69980000-78077248.vcf.gz work/bwa_mem2.popdel_call.19-1-10020000/out/bwa_mem2.popdel_call.19-1-10020000.vcf.gz work/bwa_mem2.popdel_call.19-9980000-20020000/out/bwa_mem2.popdel_call.19-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.19-19980000-30020000/out/bwa_mem2.popdel_call.19-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.19-29980000-40020000/out/bwa_mem2.popdel_call.19-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.19-39980000-50020000/out/bwa_mem2.popdel_call.19-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.19-49980000-59128983/out/bwa_mem2.popdel_call.19-49980000-59128983.vcf.gz work/bwa_mem2.popdel_call.20-1-10020000/out/bwa_mem2.popdel_call.20-1-10020000.vcf.gz work/bwa_mem2.popdel_call.20-9980000-20020000/out/bwa_mem2.popdel_call.20-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.20-19980000-30020000/out/bwa_mem2.popdel_call.20-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.20-29980000-40020000/out/bwa_mem2.popdel_call.20-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.20-39980000-50020000/out/bwa_mem2.popdel_call.20-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.20-49980000-60020000/out/bwa_mem2.popdel_call.20-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.20-59980000-63025520/out/bwa_mem2.popdel_call.20-59980000-63025520.vcf.gz work/bwa_mem2.popdel_call.21-1-10020000/out/bwa_mem2.popdel_call.21-1-10020000.vcf.gz work/bwa_mem2.popdel_call.21-9980000-20020000/out/bwa_mem2.popdel_call.21-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.21-19980000-30020000/out/bwa_mem2.popdel_call.21-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.21-29980000-40020000/out/bwa_mem2.popdel_call.21-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.21-39980000-48129895/out/bwa_mem2.popdel_call.21-39980000-48129895.vcf.gz work/bwa_mem2.popdel_call.22-1-10020000/out/bwa_mem2.popdel_call.22-1-10020000.vcf.gz work/bwa_mem2.popdel_call.22-9980000-20020000/out/bwa_mem2.popdel_call.22-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.22-19980000-30020000/out/bwa_mem2.popdel_call.22-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.22-29980000-40020000/out/bwa_mem2.popdel_call.22-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.22-39980000-50020000/out/bwa_mem2.popdel_call.22-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.22-49980000-51304566/out/bwa_mem2.popdel_call.22-49980000-51304566.vcf.gz work/bwa_mem2.popdel_call.X-1-10020000/out/bwa_mem2.popdel_call.X-1-10020000.vcf.gz work/bwa_mem2.popdel_call.X-9980000-20020000/out/bwa_mem2.popdel_call.X-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.X-19980000-30020000/out/bwa_mem2.popdel_call.X-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.X-29980000-40020000/out/bwa_mem2.popdel_call.X-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.X-39980000-50020000/out/bwa_mem2.popdel_call.X-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.X-49980000-60020000/out/bwa_mem2.popdel_call.X-49980000-60020000.vcf.gz work/bwa_mem2.popdel_call.X-59980000-70020000/out/bwa_mem2.popdel_call.X-59980000-70020000.vcf.gz work/bwa_mem2.popdel_call.X-69980000-80020000/out/bwa_mem2.popdel_call.X-69980000-80020000.vcf.gz work/bwa_mem2.popdel_call.X-79980000-90020000/out/bwa_mem2.popdel_call.X-79980000-90020000.vcf.gz work/bwa_mem2.popdel_call.X-89980000-100020000/out/bwa_mem2.popdel_call.X-89980000-100020000.vcf.gz work/bwa_mem2.popdel_call.X-99980000-110020000/out/bwa_mem2.popdel_call.X-99980000-110020000.vcf.gz work/bwa_mem2.popdel_call.X-109980000-120020000/out/bwa_mem2.popdel_call.X-109980000-120020000.vcf.gz work/bwa_mem2.popdel_call.X-119980000-130020000/out/bwa_mem2.popdel_call.X-119980000-130020000.vcf.gz work/bwa_mem2.popdel_call.X-129980000-140020000/out/bwa_mem2.popdel_call.X-129980000-140020000.vcf.gz work/bwa_mem2.popdel_call.X-139980000-150020000/out/bwa_mem2.popdel_call.X-139980000-150020000.vcf.gz work/bwa_mem2.popdel_call.X-149980000-155270560/out/bwa_mem2.popdel_call.X-149980000-155270560.vcf.gz work/bwa_mem2.popdel_call.Y-1-10020000/out/bwa_mem2.popdel_call.Y-1-10020000.vcf.gz work/bwa_mem2.popdel_call.Y-9980000-20020000/out/bwa_mem2.popdel_call.Y-9980000-20020000.vcf.gz work/bwa_mem2.popdel_call.Y-19980000-30020000/out/bwa_mem2.popdel_call.Y-19980000-30020000.vcf.gz work/bwa_mem2.popdel_call.Y-29980000-40020000/out/bwa_mem2.popdel_call.Y-29980000-40020000.vcf.gz work/bwa_mem2.popdel_call.Y-39980000-50020000/out/bwa_mem2.popdel_call.Y-39980000-50020000.vcf.gz work/bwa_mem2.popdel_call.Y-49980000-59373566/out/bwa_mem2.popdel_call.Y-49980000-59373566.vcf.gz work/bwa_mem2.popdel_call.MT-1-16569/out/bwa_mem2.popdel_call.MT-1-16569.vcf.gz work/bwa_mem2.popdel_call.GL000207__dot__1-1-4262/out/bwa_mem2.popdel_call.GL000207__dot__1-1-4262.vcf.gz work/bwa_mem2.popdel_call.GL000226__dot__1-1-15008/out/bwa_mem2.popdel_call.GL000226__dot__1-1-15008.vcf.gz work/bwa_mem2.popdel_call.GL000229__dot__1-1-19913/out/bwa_mem2.popdel_call.GL000229__dot__1-1-19913.vcf.gz work/bwa_mem2.popdel_call.GL000231__dot__1-1-27386/out/bwa_mem2.popdel_call.GL000231__dot__1-1-27386.vcf.gz work/bwa_mem2.popdel_call.GL000210__dot__1-1-27682/out/bwa_mem2.popdel_call.GL000210__dot__1-1-27682.vcf.gz work/bwa_mem2.popdel_call.GL000239__dot__1-1-33824/out/bwa_mem2.popdel_call.GL000239__dot__1-1-33824.vcf.gz work/bwa_mem2.popdel_call.GL000235__dot__1-1-34474/out/bwa_mem2.popdel_call.GL000235__dot__1-1-34474.vcf.gz work/bwa_mem2.popdel_call.GL000201__dot__1-1-36148/out/bwa_mem2.popdel_call.GL000201__dot__1-1-36148.vcf.gz work/bwa_mem2.popdel_call.GL000247__dot__1-1-36422/out/bwa_mem2.popdel_call.GL000247__dot__1-1-36422.vcf.gz work/bwa_mem2.popdel_call.GL000245__dot__1-1-36651/out/bwa_mem2.popdel_call.GL000245__dot__1-1-36651.vcf.gz work/bwa_mem2.popdel_call.GL000197__dot__1-1-37175/out/bwa_mem2.popdel_call.GL000197__dot__1-1-37175.vcf.gz work/bwa_mem2.popdel_call.GL000203__dot__1-1-37498/out/bwa_mem2.popdel_call.GL000203__dot__1-1-37498.vcf.gz work/bwa_mem2.popdel_call.GL000246__dot__1-1-38154/out/bwa_mem2.popdel_call.GL000246__dot__1-1-38154.vcf.gz work/bwa_mem2.popdel_call.GL000249__dot__1-1-38502/out/bwa_mem2.popdel_call.GL000249__dot__1-1-38502.vcf.gz work/bwa_mem2.popdel_call.GL000196__dot__1-1-38914/out/bwa_mem2.popdel_call.GL000196__dot__1-1-38914.vcf.gz work/bwa_mem2.popdel_call.GL000248__dot__1-1-39786/out/bwa_mem2.popdel_call.GL000248__dot__1-1-39786.vcf.gz work/bwa_mem2.popdel_call.GL000244__dot__1-1-39929/out/bwa_mem2.popdel_call.GL000244__dot__1-1-39929.vcf.gz work/bwa_mem2.popdel_call.GL000238__dot__1-1-39939/out/bwa_mem2.popdel_call.GL000238__dot__1-1-39939.vcf.gz work/bwa_mem2.popdel_call.GL000202__dot__1-1-40103/out/bwa_mem2.popdel_call.GL000202__dot__1-1-40103.vcf.gz work/bwa_mem2.popdel_call.GL000234__dot__1-1-40531/out/bwa_mem2.popdel_call.GL000234__dot__1-1-40531.vcf.gz work/bwa_mem2.popdel_call.GL000232__dot__1-1-40652/out/bwa_mem2.popdel_call.GL000232__dot__1-1-40652.vcf.gz work/bwa_mem2.popdel_call.GL000206__dot__1-1-41001/out/bwa_mem2.popdel_call.GL000206__dot__1-1-41001.vcf.gz work/bwa_mem2.popdel_call.GL000240__dot__1-1-41933/out/bwa_mem2.popdel_call.GL000240__dot__1-1-41933.vcf.gz work/bwa_mem2.popdel_call.GL000236__dot__1-1-41934/out/bwa_mem2.popdel_call.GL000236__dot__1-1-41934.vcf.gz work/bwa_mem2.popdel_call.GL000241__dot__1-1-42152/out/bwa_mem2.popdel_call.GL000241__dot__1-1-42152.vcf.gz work/bwa_mem2.popdel_call.GL000243__dot__1-1-43341/out/bwa_mem2.popdel_call.GL000243__dot__1-1-43341.vcf.gz work/bwa_mem2.popdel_call.GL000242__dot__1-1-43523/out/bwa_mem2.popdel_call.GL000242__dot__1-1-43523.vcf.gz work/bwa_mem2.popdel_call.GL000230__dot__1-1-43691/out/bwa_mem2.popdel_call.GL000230__dot__1-1-43691.vcf.gz work/bwa_mem2.popdel_call.GL000237__dot__1-1-45867/out/bwa_mem2.popdel_call.GL000237__dot__1-1-45867.vcf.gz work/bwa_mem2.popdel_call.GL000233__dot__1-1-45941/out/bwa_mem2.popdel_call.GL000233__dot__1-1-45941.vcf.gz work/bwa_mem2.popdel_call.GL000204__dot__1-1-81310/out/bwa_mem2.popdel_call.GL000204__dot__1-1-81310.vcf.gz work/bwa_mem2.popdel_call.GL000198__dot__1-1-90085/out/bwa_mem2.popdel_call.GL000198__dot__1-1-90085.vcf.gz work/bwa_mem2.popdel_call.GL000208__dot__1-1-92689/out/bwa_mem2.popdel_call.GL000208__dot__1-1-92689.vcf.gz work/bwa_mem2.popdel_call.GL000191__dot__1-1-106433/out/bwa_mem2.popdel_call.GL000191__dot__1-1-106433.vcf.gz work/bwa_mem2.popdel_call.GL000227__dot__1-1-128374/out/bwa_mem2.popdel_call.GL000227__dot__1-1-128374.vcf.gz work/bwa_mem2.popdel_call.GL000228__dot__1-1-129120/out/bwa_mem2.popdel_call.GL000228__dot__1-1-129120.vcf.gz work/bwa_mem2.popdel_call.GL000214__dot__1-1-137718/out/bwa_mem2.popdel_call.GL000214__dot__1-1-137718.vcf.gz work/bwa_mem2.popdel_call.GL000221__dot__1-1-155397/out/bwa_mem2.popdel_call.GL000221__dot__1-1-155397.vcf.gz work/bwa_mem2.popdel_call.GL000209__dot__1-1-159169/out/bwa_mem2.popdel_call.GL000209__dot__1-1-159169.vcf.gz work/bwa_mem2.popdel_call.GL000218__dot__1-1-161147/out/bwa_mem2.popdel_call.GL000218__dot__1-1-161147.vcf.gz work/bwa_mem2.popdel_call.GL000220__dot__1-1-161802/out/bwa_mem2.popdel_call.GL000220__dot__1-1-161802.vcf.gz work/bwa_mem2.popdel_call.GL000213__dot__1-1-164239/out/bwa_mem2.popdel_call.GL000213__dot__1-1-164239.vcf.gz work/bwa_mem2.popdel_call.GL000211__dot__1-1-166566/out/bwa_mem2.popdel_call.GL000211__dot__1-1-166566.vcf.gz work/bwa_mem2.popdel_call.GL000199__dot__1-1-169874/out/bwa_mem2.popdel_call.GL000199__dot__1-1-169874.vcf.gz work/bwa_mem2.popdel_call.GL000217__dot__1-1-172149/out/bwa_mem2.popdel_call.GL000217__dot__1-1-172149.vcf.gz work/bwa_mem2.popdel_call.GL000216__dot__1-1-172294/out/bwa_mem2.popdel_call.GL000216__dot__1-1-172294.vcf.gz work/bwa_mem2.popdel_call.GL000215__dot__1-1-172545/out/bwa_mem2.popdel_call.GL000215__dot__1-1-172545.vcf.gz work/bwa_mem2.popdel_call.GL000205__dot__1-1-174588/out/bwa_mem2.popdel_call.GL000205__dot__1-1-174588.vcf.gz work/bwa_mem2.popdel_call.GL000219__dot__1-1-179198/out/bwa_mem2.popdel_call.GL000219__dot__1-1-179198.vcf.gz work/bwa_mem2.popdel_call.GL000224__dot__1-1-179693/out/bwa_mem2.popdel_call.GL000224__dot__1-1-179693.vcf.gz work/bwa_mem2.popdel_call.GL000223__dot__1-1-180455/out/bwa_mem2.popdel_call.GL000223__dot__1-1-180455.vcf.gz work/bwa_mem2.popdel_call.GL000195__dot__1-1-182896/out/bwa_mem2.popdel_call.GL000195__dot__1-1-182896.vcf.gz work/bwa_mem2.popdel_call.GL000212__dot__1-1-186858/out/bwa_mem2.popdel_call.GL000212__dot__1-1-186858.vcf.gz work/bwa_mem2.popdel_call.GL000222__dot__1-1-186861/out/bwa_mem2.popdel_call.GL000222__dot__1-1-186861.vcf.gz work/bwa_mem2.popdel_call.GL000200__dot__1-1-187035/out/bwa_mem2.popdel_call.GL000200__dot__1-1-187035.vcf.gz work/bwa_mem2.popdel_call.GL000193__dot__1-1-189789/out/bwa_mem2.popdel_call.GL000193__dot__1-1-189789.vcf.gz work/bwa_mem2.popdel_call.GL000194__dot__1-1-191469/out/bwa_mem2.popdel_call.GL000194__dot__1-1-191469.vcf.gz work/bwa_mem2.popdel_call.GL000225__dot__1-1-211173/out/bwa_mem2.popdel_call.GL000225__dot__1-1-211173.vcf.gz work/bwa_mem2.popdel_call.GL000192__dot__1-1-547496/out/bwa_mem2.popdel_call.GL000192__dot__1-1-547496.vcf.gz; Date=Mon Jan 23 14:31:37 2023 +##INFO= +##INFO= +##bcftools_viewVersion=1.16+htslib-1.16 +##bcftools_viewCommand=view --samples-file /data/cephfs-1/scratch/groups/cubi/holtgrem_c/tmp/hpc-cpu-8/20230123/tmp.gEa7PqtfGr/samples.txt --output-type u work/bwa_mem2.popdel_concat_calls/out/bwa_mem2.popdel_concat_calls.vcf.gz; Date=Mon Jan 23 14:34:09 2023 +##bcftools_viewCommand=view --output-file work/bwa_mem2.popdel.11_0351-N1-DNA1-WGS1/out/bwa_mem2.popdel.11_0351-N1-DNA1-WGS1.vcf.gz --output-type z --include '(GT !~ "\.") && (GT ~ "1")'; Date=Mon Jan 23 14:34:09 2023 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother +1 1224181 . N 100 PASS IMPRECISE;SVLEN=-1621;END=1225801;SVTYPE=DEL;AF=0.151466;LR=1048.74;SVMETHOD=PopDelv1.1.2;YIELD=1;SWIN=480;AC=3;AN=6 GT:PL:GQ:LAD:DAD:FL:FLD 0/1:4,0,30:4:0,0,1:0,0,0,1,0:1223400,1224000:600 0/1:7,0,36:7:0,0,1:0,0,0,1,0:1223490,1223970:480 0/1:80,0,7:7:0,1,5:0,0,1,3,2:1223340,1225380:2040 From 5b78d351b0cd5a60ab6edf525f9cbf304b2e01a1 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 14:34:45 +0200 Subject: [PATCH 03/13] wip --- src/common.rs | 286 +++++++++--------- src/db/to_bin/clinvar/mod.rs | 10 +- src/seqvars/ingest/header.rs | 14 +- src/seqvars/ingest/mod.rs | 13 +- ...mmon__test__open_read_maybe_gz@false.snap} | 0 ...ommon__test__open_read_maybe_gz@true.snap} | 0 ...mon__test__open_write_maybe_gz@false.snap} | 0 ...mmon__test__open_write_maybe_gz@true.snap} | 0 ...mmon__test__open_write_maybe_gz_plain.snap | 5 - src/strucvars/ingest/header.rs | 12 +- 10 files changed, 161 insertions(+), 179 deletions(-) rename src/snapshots/{varfish_server_worker__common__test__open_read_maybe_gz-2.snap => varfish_server_worker__common__test__open_read_maybe_gz@false.snap} (100%) rename src/snapshots/{varfish_server_worker__common__test__open_read_maybe_gz.snap => varfish_server_worker__common__test__open_read_maybe_gz@true.snap} (100%) rename src/snapshots/{varfish_server_worker__common__test__open_write_maybe_gz-2.snap => varfish_server_worker__common__test__open_write_maybe_gz@false.snap} (100%) rename src/snapshots/{varfish_server_worker__common__test__open_write_maybe_gz.snap => varfish_server_worker__common__test__open_write_maybe_gz@true.snap} (100%) delete mode 100644 src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz_plain.snap diff --git a/src/common.rs b/src/common.rs index 7a2da75e..fb413397 100644 --- a/src/common.rs +++ b/src/common.rs @@ -234,6 +234,153 @@ mod tests { } } +/// Return the version of the `varfish-server-worker` crate and `x.y.z` in tests. +pub fn worker_version() -> &'static str { + if cfg!(test) { + "x.y.z" + } else { + env!("CARGO_PKG_VERSION") + } +} + +/// Add contigs for GRCh37. +pub fn add_contigs_37( + builder: vcf::header::Builder, +) -> Result { + use vcf::header::record::value::map::Contig; + use vcf::header::record::value::Map; + + let mut builder = builder; + + let specs: &[(&str, usize); 25] = &[ + ("1", 249250621), + ("2", 243199373), + ("3", 198022430), + ("4", 191154276), + ("5", 180915260), + ("6", 171115067), + ("7", 159138663), + ("8", 146364022), + ("9", 141213431), + ("10", 135534747), + ("11", 135006516), + ("12", 133851895), + ("13", 115169878), + ("14", 107349540), + ("15", 102531392), + ("16", 90354753), + ("17", 81195210), + ("18", 78077248), + ("19", 59128983), + ("20", 63025520), + ("21", 48129895), + ("22", 51304566), + ("X", 155270560), + ("Y", 59373566), + ("MT", 16569), + ]; + + for (contig, length) in specs { + builder = builder.add_contig( + contig + .parse() + .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, + Map::::builder() + .set_length(*length) + .insert( + "assembly" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, + "GRCh37", + ) + .insert( + "species" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: species"))?, + "Homo sapiens", + ) + .build()?, + ); + } + + Ok(builder) +} + +/// Add contigs for GRCh38. +pub fn add_contigs_38( + builder: vcf::header::Builder, +) -> Result { + use vcf::header::record::value::map::Contig; + use vcf::header::record::value::Map; + + let mut builder = builder; + + let specs: &[(&str, usize); 25] = &[ + ("chr1", 248956422), + ("chr2", 242193529), + ("chr3", 198295559), + ("chr4", 190214555), + ("chr5", 181538259), + ("chr6", 170805979), + ("chr7", 159345973), + ("chr8", 145138636), + ("chr9", 138394717), + ("chr10", 133797422), + ("chr11", 135086622), + ("chr12", 133275309), + ("chr13", 114364328), + ("chr14", 107043718), + ("chr15", 101991189), + ("chr16", 90338345), + ("chr17", 83257441), + ("chr18", 80373285), + ("chr19", 58617616), + ("chr20", 64444167), + ("chr21", 46709983), + ("chr22", 50818468), + ("chrX", 156040895), + ("chrY", 57227415), + ("chrM", 16569), + ]; + + for (contig, length) in specs { + builder = builder.add_contig( + contig + .parse() + .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, + Map::::builder() + .set_length(*length) + .insert( + "assembly" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, + "GRCh38", + ) + .insert( + "species" + .parse() + .map_err(|_| anyhow::anyhow!("invalid key: species"))?, + "Homo sapiens", + ) + .build()?, + ); + } + + Ok(builder) +} + +#[cfg(test)] +macro_rules! set_snapshot_suffix { + ($($expr:expr),*) => { + let mut settings = insta::Settings::clone_current(); + settings.set_snapshot_suffix(format!($($expr,)*)); + let _guard = settings.bind_to_scope(); + } +} + +#[cfg(test)] +pub(crate) use set_snapshot_suffix; + #[cfg(test)] mod test { use std::io::Read; @@ -253,6 +400,8 @@ mod test { #[case(true)] #[case(false)] fn open_write_maybe_gz(#[case] is_gzip: bool) -> Result<(), anyhow::Error> { + crate::common::set_snapshot_suffix!("{:?}", is_gzip); + let filename = if is_gzip { "test.txt" } else { "test.txt.gz" }; let tmp_dir = temp_testdir::TempDir::default(); @@ -274,6 +423,8 @@ mod test { #[case(true)] #[case(false)] fn open_read_maybe_gz(#[case] is_gzip: bool) -> Result<(), anyhow::Error> { + crate::common::set_snapshot_suffix!("{:?}", is_gzip); + let mut f = super::open_read_maybe_gz(if is_gzip { "tests/common/test.txt.gz" } else { @@ -386,138 +537,3 @@ mod test { Ok(()) } } - -/// Return the version of the `varfish-server-worker` crate and `x.y.z` in tests. -pub fn worker_version() -> &'static str { - if cfg!(test) { - "x.y.z" - } else { - env!("CARGO_PKG_VERSION") - } -} - -/// Add contigs for GRCh37. -pub fn add_contigs_37( - builder: vcf::header::Builder, -) -> Result { - use vcf::header::record::value::map::Contig; - use vcf::header::record::value::Map; - - let mut builder = builder; - - let specs: &[(&str, usize); 25] = &[ - ("1", 249250621), - ("2", 243199373), - ("3", 198022430), - ("4", 191154276), - ("5", 180915260), - ("6", 171115067), - ("7", 159138663), - ("8", 146364022), - ("9", 141213431), - ("10", 135534747), - ("11", 135006516), - ("12", 133851895), - ("13", 115169878), - ("14", 107349540), - ("15", 102531392), - ("16", 90354753), - ("17", 81195210), - ("18", 78077248), - ("19", 59128983), - ("20", 63025520), - ("21", 48129895), - ("22", 51304566), - ("X", 155270560), - ("Y", 59373566), - ("MT", 16569), - ]; - - for (contig, length) in specs { - builder = builder.add_contig( - contig - .parse() - .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, - Map::::builder() - .set_length(*length) - .insert( - "assembly" - .parse() - .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, - "GRCh37", - ) - .insert( - "species" - .parse() - .map_err(|_| anyhow::anyhow!("invalid key: species"))?, - "Homo sapiens", - ) - .build()?, - ); - } - - Ok(builder) -} - -/// Add contigs for GRCh38. -pub fn add_contigs_38( - builder: vcf::header::Builder, -) -> Result { - use vcf::header::record::value::map::Contig; - use vcf::header::record::value::Map; - - let mut builder = builder; - - let specs: &[(&str, usize); 25] = &[ - ("chr1", 248956422), - ("chr2", 242193529), - ("chr3", 198295559), - ("chr4", 190214555), - ("chr5", 181538259), - ("chr6", 170805979), - ("chr7", 159345973), - ("chr8", 145138636), - ("chr9", 138394717), - ("chr10", 133797422), - ("chr11", 135086622), - ("chr12", 133275309), - ("chr13", 114364328), - ("chr14", 107043718), - ("chr15", 101991189), - ("chr16", 90338345), - ("chr17", 83257441), - ("chr18", 80373285), - ("chr19", 58617616), - ("chr20", 64444167), - ("chr21", 46709983), - ("chr22", 50818468), - ("chrX", 156040895), - ("chrY", 57227415), - ("chrM", 16569), - ]; - - for (contig, length) in specs { - builder = builder.add_contig( - contig - .parse() - .map_err(|_| anyhow::anyhow!("invalid contig: {}", contig))?, - Map::::builder() - .set_length(*length) - .insert( - "assembly" - .parse() - .map_err(|_| anyhow::anyhow!("invalid key: assembly"))?, - "GRCh38", - ) - .insert( - "species" - .parse() - .map_err(|_| anyhow::anyhow!("invalid key: species"))?, - "Homo sapiens", - ) - .build()?, - ); - } - - Ok(builder) -} diff --git a/src/db/to_bin/clinvar/mod.rs b/src/db/to_bin/clinvar/mod.rs index 459dd6ae..9d9671f6 100644 --- a/src/db/to_bin/clinvar/mod.rs +++ b/src/db/to_bin/clinvar/mod.rs @@ -185,19 +185,11 @@ mod test { use crate::common::open_read_maybe_gz; use crate::db::to_bin::clinvar::input::Assembly; - macro_rules! set_snapshot_suffix { - ($($expr:expr),*) => { - let mut settings = insta::Settings::clone_current(); - settings.set_snapshot_suffix(format!($($expr,)*)); - let _guard = settings.bind_to_scope(); - } - } - #[rstest::rstest] #[case(Assembly::Grch37)] #[case(Assembly::Grch38)] fn run_convert_jsonl_to_protobuf(#[case] assembly: Assembly) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{:?}", assembly); + crate::common::set_snapshot_suffix!("{:?}", assembly); let reader = open_read_maybe_gz( "tests/db/to-bin/varfish-db-downloader/vardbs/clinvar/clinvar-svs.jsonl", )?; diff --git a/src/seqvars/ingest/header.rs b/src/seqvars/ingest/header.rs index ac857839..78ed64e5 100644 --- a/src/seqvars/ingest/header.rs +++ b/src/seqvars/ingest/header.rs @@ -421,21 +421,13 @@ mod test { use super::VariantCaller; - macro_rules! set_snapshot_suffix { - ($($expr:expr),*) => { - let mut settings = insta::Settings::clone_current(); - settings.set_snapshot_suffix(format!($($expr,)*)); - let _guard = settings.bind_to_scope(); - } - } - #[rstest] #[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.4.vcf")] #[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.9.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn variant_caller_guess(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? @@ -452,7 +444,7 @@ mod test { #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); @@ -485,7 +477,7 @@ mod test { #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); diff --git a/src/seqvars/ingest/mod.rs b/src/seqvars/ingest/mod.rs index f6dbd9f5..0e69fa3b 100644 --- a/src/seqvars/ingest/mod.rs +++ b/src/seqvars/ingest/mod.rs @@ -472,14 +472,6 @@ mod test { use crate::common::GenomeRelease; - macro_rules! set_snapshot_suffix { - ($($expr:expr),*) => { - let mut settings = insta::Settings::clone_current(); - settings.set_snapshot_suffix(format!($($expr,)*)); - let _guard = settings.bind_to_scope(); - } - } - #[rstest] #[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.4.vcf")] #[case("tests/seqvars/ingest/example_dragen.07.021.624.3.10.9.vcf")] @@ -488,7 +480,10 @@ mod test { #[case("tests/seqvars/ingest/NA12878_dragen.vcf")] #[case("tests/seqvars/ingest/Case_1.vcf")] fn result_snapshot_test(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap().replace('.', "_")); + crate::common::set_snapshot_suffix!( + "{}", + path.split('/').last().unwrap().replace('.', "_") + ); let tmpdir = temp_testdir::TempDir::default(); diff --git a/src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz-2.snap b/src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz@false.snap similarity index 100% rename from src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz-2.snap rename to src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz@false.snap diff --git a/src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz.snap b/src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz@true.snap similarity index 100% rename from src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz.snap rename to src/snapshots/varfish_server_worker__common__test__open_read_maybe_gz@true.snap diff --git a/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz-2.snap b/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz@false.snap similarity index 100% rename from src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz-2.snap rename to src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz@false.snap diff --git a/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz.snap b/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz@true.snap similarity index 100% rename from src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz.snap rename to src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz@true.snap diff --git a/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz_plain.snap b/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz_plain.snap deleted file mode 100644 index a7fbc40b..00000000 --- a/src/snapshots/varfish_server_worker__common__test__open_write_maybe_gz_plain.snap +++ /dev/null @@ -1,5 +0,0 @@ ---- -source: src/common.rs -expression: "format!(\"{:x?}\", & buf)" ---- -[] diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index b39683ab..06d3cae2 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -254,14 +254,6 @@ mod test { use mehari::ped::PedigreeByName; use rstest::rstest; - macro_rules! set_snapshot_suffix { - ($($expr:expr),*) => { - let mut settings = insta::Settings::clone_current(); - settings.set_snapshot_suffix(format!($($expr,)*)); - let _guard = settings.bind_to_scope(); - } - } - #[rstest] #[case("tests/db/strucvars/ingest/delly2-min.vcf")] #[case("tests/db/strucvars/ingest/dragen-cnv-min.vcf")] @@ -272,7 +264,7 @@ mod test { #[case("tests/db/strucvars/ingest/popdel-min.vcf")] fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); @@ -311,7 +303,7 @@ mod test { #[case("tests/db/strucvars/ingest/melt-min.vcf")] #[case("tests/db/strucvars/ingest/popdel-min.vcf")] fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { - set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); From 501da08ece3597b198847b0d5439ce2df37189d3 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 15:21:48 +0200 Subject: [PATCH 04/13] wip --- Cargo.lock | 2 - Cargo.toml | 3 +- README.md | 4 + src/common.rs | 64 ++----- src/db/mk_inhouse/cli.rs | 4 +- src/db/to_bin/clinvar/mod.rs | 7 +- src/db/to_bin/gene_region.rs | 3 +- src/db/to_bin/masked.rs | 3 +- src/db/to_bin/vardbs/mod.rs | 3 +- src/db/to_bin/xlink.rs | 3 +- src/seqvars/ingest/header.rs | 6 +- src/seqvars/ingest/mod.rs | 18 +- src/strucvars/ingest/header.rs | 32 ++-- src/strucvars/ingest/mod.rs | 177 ++++++++++++++++-- src/strucvars/query/genes.rs | 3 +- src/strucvars/query/mod.rs | 3 +- src/strucvars/query/pathogenic.rs | 3 +- src/strucvars/query/tads.rs | 3 +- .../{db => }/strucvars/ingest/delly2-min.ped | 0 .../{db => }/strucvars/ingest/delly2-min.vcf | 0 tests/strucvars/ingest/delly2-min.vcf.gz | Bin 0 -> 2324 bytes tests/strucvars/ingest/delly2-min.vcf.gz.tbi | Bin 0 -> 115 bytes .../strucvars/ingest/dragen-cnv-min.ped | 0 .../strucvars/ingest/dragen-cnv-min.vcf | 0 tests/strucvars/ingest/dragen-cnv-min.vcf.gz | Bin 0 -> 1544 bytes .../ingest/dragen-cnv-min.vcf.gz.tbi | Bin 0 -> 114 bytes .../strucvars/ingest/dragen-sv-min.ped | 0 .../strucvars/ingest/dragen-sv-min.vcf | 0 tests/strucvars/ingest/dragen-sv-min.vcf.gz | Bin 0 -> 2878 bytes .../strucvars/ingest/dragen-sv-min.vcf.gz.tbi | Bin 0 -> 116 bytes tests/{db => }/strucvars/ingest/gcnv-min.ped | 0 tests/{db => }/strucvars/ingest/gcnv-min.vcf | 0 tests/strucvars/ingest/gcnv-min.vcf.gz | Bin 0 -> 3225 bytes tests/strucvars/ingest/gcnv-min.vcf.gz.tbi | Bin 0 -> 126 bytes tests/{db => }/strucvars/ingest/manta-min.ped | 0 tests/{db => }/strucvars/ingest/manta-min.vcf | 0 tests/strucvars/ingest/manta-min.vcf.gz | Bin 0 -> 2488 bytes tests/strucvars/ingest/manta-min.vcf.gz.tbi | Bin 0 -> 117 bytes tests/{db => }/strucvars/ingest/melt-min.ped | 0 tests/{db => }/strucvars/ingest/melt-min.vcf | 0 tests/strucvars/ingest/melt-min.vcf.gz | Bin 0 -> 2030 bytes tests/strucvars/ingest/melt-min.vcf.gz.tbi | Bin 0 -> 111 bytes .../{db => }/strucvars/ingest/popdel-min.ped | 0 .../{db => }/strucvars/ingest/popdel-min.vcf | 0 tests/strucvars/ingest/popdel-min.vcf.gz | Bin 0 -> 5374 bytes tests/strucvars/ingest/popdel-min.vcf.gz.tbi | Bin 0 -> 114 bytes 46 files changed, 222 insertions(+), 119 deletions(-) rename tests/{db => }/strucvars/ingest/delly2-min.ped (100%) rename tests/{db => }/strucvars/ingest/delly2-min.vcf (100%) create mode 100644 tests/strucvars/ingest/delly2-min.vcf.gz create mode 100644 tests/strucvars/ingest/delly2-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/dragen-cnv-min.ped (100%) rename tests/{db => }/strucvars/ingest/dragen-cnv-min.vcf (100%) create mode 100644 tests/strucvars/ingest/dragen-cnv-min.vcf.gz create mode 100644 tests/strucvars/ingest/dragen-cnv-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/dragen-sv-min.ped (100%) rename tests/{db => }/strucvars/ingest/dragen-sv-min.vcf (100%) create mode 100644 tests/strucvars/ingest/dragen-sv-min.vcf.gz create mode 100644 tests/strucvars/ingest/dragen-sv-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/gcnv-min.ped (100%) rename tests/{db => }/strucvars/ingest/gcnv-min.vcf (100%) create mode 100644 tests/strucvars/ingest/gcnv-min.vcf.gz create mode 100644 tests/strucvars/ingest/gcnv-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/manta-min.ped (100%) rename tests/{db => }/strucvars/ingest/manta-min.vcf (100%) create mode 100644 tests/strucvars/ingest/manta-min.vcf.gz create mode 100644 tests/strucvars/ingest/manta-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/melt-min.ped (100%) rename tests/{db => }/strucvars/ingest/melt-min.vcf (100%) create mode 100644 tests/strucvars/ingest/melt-min.vcf.gz create mode 100644 tests/strucvars/ingest/melt-min.vcf.gz.tbi rename tests/{db => }/strucvars/ingest/popdel-min.ped (100%) rename tests/{db => }/strucvars/ingest/popdel-min.vcf (100%) create mode 100644 tests/strucvars/ingest/popdel-min.vcf.gz create mode 100644 tests/strucvars/ingest/popdel-min.vcf.gz.tbi diff --git a/Cargo.lock b/Cargo.lock index 9a5384b2..6d8ac9d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1796,8 +1796,6 @@ dependencies = [ [[package]] name = "mehari" version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a95119609578b3515e0b3720c1a21a7a38cd2303a7557f5c0771372fc3d1fe4c" dependencies = [ "actix-web", "annonars", diff --git a/Cargo.toml b/Cargo.toml index 7572b6a6..e3a595be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,8 @@ hgvs = "0.11" indexmap = { version = "2.0", features = ["serde"] } itertools = "0.11" log = "0.4" -mehari = "0.10" +#mehari = "0.11" +mehari = { path = "../mehari" } multimap = "0.9" procfs = "0.15" prost = "0.12" diff --git a/README.md b/README.md index 405bea95..20093a8c 100644 --- a/README.md +++ b/README.md @@ -222,6 +222,10 @@ Overall, the command will emit the following header rows in addition to the `##c ##x-varfish-version= ``` +> [!NOTE] +> The `strucvars ingest` step does not perform any annotation. +> It only merges the input VCF files from multiple callers (all files must have the same samples) and converts them into the internal format. + # Developer Information This section is only relevant for developers of `varfish-server-worker`. diff --git a/src/common.rs b/src/common.rs index fb413397..da42a195 100644 --- a/src/common.rs +++ b/src/common.rs @@ -2,7 +2,7 @@ use std::{ fs::File, - io::{BufRead, BufReader, BufWriter, Write}, + io::{BufRead, BufWriter, Write}, ops::Range, path::Path, }; @@ -10,7 +10,7 @@ use std::{ use byte_unit::Byte; use clap::Parser; use clap_verbosity_flag::{InfoLevel, Verbosity}; -use flate2::{bufread::MultiGzDecoder, write::GzEncoder, Compression}; +use flate2::{write::GzEncoder, Compression}; use hgvs::static_data::Assembly; use indexmap::IndexMap; use noodles_vcf as vcf; @@ -67,24 +67,6 @@ pub fn build_chrom_map() -> IndexMap { result } -/// Transparently open a file with gzip decoder. -pub fn open_read_maybe_gz

(path: P) -> Result, anyhow::Error> -where - P: AsRef, -{ - if path.as_ref().extension().map(|s| s.to_str()) == Some(Some("gz")) { - tracing::trace!("Opening {:?} as gzip for reading", path.as_ref()); - let file = File::open(path)?; - let bufreader = BufReader::new(file); - let decoder = MultiGzDecoder::new(bufreader); - Ok(Box::new(BufReader::new(decoder))) - } else { - tracing::trace!("Opening {:?} as plain text for reading", path.as_ref()); - let file = File::open(path).map(BufReader::new)?; - Ok(Box::new(BufReader::new(file))) - } -} - /// Transparently opena file with gzip encoder. pub fn open_write_maybe_gz

(path: P) -> Result, anyhow::Error> where @@ -370,16 +352,18 @@ pub fn add_contigs_38( } #[cfg(test)] -macro_rules! set_snapshot_suffix { - ($($expr:expr),*) => { - let mut settings = insta::Settings::clone_current(); - settings.set_snapshot_suffix(format!($($expr,)*)); - let _guard = settings.bind_to_scope(); - } -} +pub(crate) fn read_to_bytes

(path: P) -> Result, anyhow::Error> +where + P: AsRef, +{ + use std::io::Read; -#[cfg(test)] -pub(crate) use set_snapshot_suffix; + let mut f = std::fs::File::open(&path).expect("no file found"); + let metadata = std::fs::metadata(&path).expect("unable to read metadata"); + let mut buffer = vec![0; metadata.len() as usize]; + f.read_exact(&mut buffer).expect("buffer overflow"); + Ok(buffer) +} #[cfg(test)] mod test { @@ -400,7 +384,7 @@ mod test { #[case(true)] #[case(false)] fn open_write_maybe_gz(#[case] is_gzip: bool) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{:?}", is_gzip); + mehari::common::set_snapshot_suffix!("{:?}", is_gzip); let filename = if is_gzip { "test.txt" } else { "test.txt.gz" }; let tmp_dir = temp_testdir::TempDir::default(); @@ -419,26 +403,6 @@ mod test { Ok(()) } - #[rstest::rstest] - #[case(true)] - #[case(false)] - fn open_read_maybe_gz(#[case] is_gzip: bool) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{:?}", is_gzip); - - let mut f = super::open_read_maybe_gz(if is_gzip { - "tests/common/test.txt.gz" - } else { - "tests/common/test.txt" - })?; - - let mut buf = String::new(); - f.read_to_string(&mut buf)?; - - insta::assert_snapshot!(&buf); - - Ok(()) - } - #[rstest::rstest] #[case(0..10, 0..10, 1.0)] #[case(0..10, 5..15, 0.5)] diff --git a/src/db/mk_inhouse/cli.rs b/src/db/mk_inhouse/cli.rs index 425acf19..74491d72 100644 --- a/src/db/mk_inhouse/cli.rs +++ b/src/db/mk_inhouse/cli.rs @@ -10,6 +10,7 @@ use std::{ use bio::data_structures::interval_tree::IntervalTree; use clap::{command, Parser}; +use mehari::common::open_read_maybe_gz; use serde_json::to_writer; use serde_jsonlines::JsonLinesReader; use strum::IntoEnumIterator; @@ -17,8 +18,7 @@ use thousands::Separable; use crate::{ common::{ - build_chrom_map, open_read_maybe_gz, open_write_maybe_gz, read_lines, trace_rss_now, - GenomeRelease, CHROMS, + build_chrom_map, open_write_maybe_gz, read_lines, trace_rss_now, GenomeRelease, CHROMS, }, strucvars::query::schema::SvType, }; diff --git a/src/db/to_bin/clinvar/mod.rs b/src/db/to_bin/clinvar/mod.rs index 9d9671f6..11a21285 100644 --- a/src/db/to_bin/clinvar/mod.rs +++ b/src/db/to_bin/clinvar/mod.rs @@ -2,11 +2,12 @@ use std::{fs::File, io::BufRead, io::Write, path::Path, time::Instant}; +use mehari::common::open_read_maybe_gz; use prost::Message; use thousands::Separable; use crate::{ - common::{build_chrom_map, open_read_maybe_gz, trace_rss_now}, + common::{build_chrom_map, trace_rss_now}, strucvars::query::clinvar::pbs::{Pathogenicity, SvDatabase, SvRecord}, }; @@ -182,14 +183,14 @@ where #[cfg(test)] mod test { - use crate::common::open_read_maybe_gz; use crate::db::to_bin::clinvar::input::Assembly; + use mehari::common::open_read_maybe_gz; #[rstest::rstest] #[case(Assembly::Grch37)] #[case(Assembly::Grch38)] fn run_convert_jsonl_to_protobuf(#[case] assembly: Assembly) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{:?}", assembly); + mehari::common::set_snapshot_suffix!("{:?}", assembly); let reader = open_read_maybe_gz( "tests/db/to-bin/varfish-db-downloader/vardbs/clinvar/clinvar-svs.jsonl", )?; diff --git a/src/db/to_bin/gene_region.rs b/src/db/to_bin/gene_region.rs index 2d17a7af..b5f1fcd0 100644 --- a/src/db/to_bin/gene_region.rs +++ b/src/db/to_bin/gene_region.rs @@ -2,11 +2,12 @@ use std::{fs::File, io::Write, path::Path, time::Instant}; +use mehari::common::open_read_maybe_gz; use prost::Message; use thousands::Separable; use crate::{ - common::{build_chrom_map, numeric_gene_id, open_read_maybe_gz, trace_rss_now}, + common::{build_chrom_map, numeric_gene_id, trace_rss_now}, db::pbs::{GeneRegionDatabase, GeneRegionRecord}, }; diff --git a/src/db/to_bin/masked.rs b/src/db/to_bin/masked.rs index 6410c60b..f2fdeae5 100644 --- a/src/db/to_bin/masked.rs +++ b/src/db/to_bin/masked.rs @@ -2,11 +2,12 @@ use std::{fs::File, io::Write, path::Path, time::Instant}; +use mehari::common::open_read_maybe_gz; use prost::Message; use thousands::Separable; use crate::{ - common::{build_chrom_map, open_read_maybe_gz, trace_rss_now}, + common::{build_chrom_map, trace_rss_now}, db::pbs::{MaskedDatabase, MaskedDbRecord}, }; diff --git a/src/db/to_bin/vardbs/mod.rs b/src/db/to_bin/vardbs/mod.rs index 102e4dcb..a99e27db 100644 --- a/src/db/to_bin/vardbs/mod.rs +++ b/src/db/to_bin/vardbs/mod.rs @@ -6,10 +6,11 @@ use std::path::Path; use std::time::Instant; use anyhow::anyhow; +use mehari::common::open_read_maybe_gz; use prost::Message; use thousands::Separable; -use crate::common::{build_chrom_map, open_read_maybe_gz, trace_rss_now}; +use crate::common::{build_chrom_map, trace_rss_now}; use crate::db; use crate::db::mk_inhouse::output::Record as InhouseDbRecord; use crate::db::pbs::{BackgroundDatabase, BgDbRecord}; diff --git a/src/db/to_bin/xlink.rs b/src/db/to_bin/xlink.rs index fd78f442..75b91c3f 100644 --- a/src/db/to_bin/xlink.rs +++ b/src/db/to_bin/xlink.rs @@ -2,11 +2,12 @@ use std::{fs::File, io::Write, path::Path, time::Instant}; +use mehari::common::open_read_maybe_gz; use prost::Message; use thousands::Separable; use crate::{ - common::{numeric_gene_id, open_read_maybe_gz, trace_rss_now}, + common::{numeric_gene_id, trace_rss_now}, db::pbs::{XlinkDatabase, XlinkRecord}, }; diff --git a/src/seqvars/ingest/header.rs b/src/seqvars/ingest/header.rs index 78ed64e5..4c978ef0 100644 --- a/src/seqvars/ingest/header.rs +++ b/src/seqvars/ingest/header.rs @@ -427,7 +427,7 @@ mod test { #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn variant_caller_guess(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? @@ -444,7 +444,7 @@ mod test { #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); @@ -477,7 +477,7 @@ mod test { #[case("tests/seqvars/ingest/example_gatk_hc.3.7-0.vcf")] #[case("tests/seqvars/ingest/example_gatk_hc.4.4.0.0.vcf")] fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); diff --git a/src/seqvars/ingest/mod.rs b/src/seqvars/ingest/mod.rs index 0e69fa3b..1bbcd512 100644 --- a/src/seqvars/ingest/mod.rs +++ b/src/seqvars/ingest/mod.rs @@ -2,8 +2,9 @@ use std::sync::{Arc, OnceLock}; -use crate::common::{self, open_read_maybe_gz, open_write_maybe_gz, worker_version, GenomeRelease}; +use crate::common::{self, open_write_maybe_gz, worker_version, GenomeRelease}; use mehari::annotate::seqvars::provider::MehariProvider; +use mehari::common::open_read_maybe_gz; use noodles_vcf as vcf; use thousands::Separable; @@ -480,7 +481,7 @@ mod test { #[case("tests/seqvars/ingest/NA12878_dragen.vcf")] #[case("tests/seqvars/ingest/Case_1.vcf")] fn result_snapshot_test(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!( + mehari::common::set_snapshot_suffix!( "{}", path.split('/').last().unwrap().replace('.', "_") ); @@ -507,17 +508,6 @@ mod test { Ok(()) } - fn read_to_bytes

(path: P) -> Result, anyhow::Error> - where - P: AsRef, - { - let mut f = std::fs::File::open(&path).expect("no file found"); - let metadata = std::fs::metadata(&path).expect("unable to read metadata"); - let mut buffer = vec![0; metadata.len() as usize]; - f.read_exact(&mut buffer).expect("buffer overflow"); - Ok(buffer) - } - #[test] fn result_snapshot_test_gz() -> Result<(), anyhow::Error> { let tmpdir = temp_testdir::TempDir::default(); @@ -541,7 +531,7 @@ mod test { super::run(&args_common, &args)?; let mut buffer = Vec::new(); - hxdmp::hexdump(&read_to_bytes(&args.path_out)?, &mut buffer)?; + hxdmp::hexdump(&crate::common::read_to_bytes(&args.path_out)?, &mut buffer)?; insta::assert_snapshot!(String::from_utf8_lossy(&buffer)); Ok(()) diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 06d3cae2..6c1b40bb 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -255,16 +255,16 @@ mod test { use rstest::rstest; #[rstest] - #[case("tests/db/strucvars/ingest/delly2-min.vcf")] - #[case("tests/db/strucvars/ingest/dragen-cnv-min.vcf")] - #[case("tests/db/strucvars/ingest/dragen-sv-min.vcf")] - #[case("tests/db/strucvars/ingest/gcnv-min.vcf")] - #[case("tests/db/strucvars/ingest/manta-min.vcf")] - #[case("tests/db/strucvars/ingest/melt-min.vcf")] - #[case("tests/db/strucvars/ingest/popdel-min.vcf")] + #[case("tests/strucvars/ingest/delly2-min.vcf")] + #[case("tests/strucvars/ingest/dragen-cnv-min.vcf")] + #[case("tests/strucvars/ingest/dragen-sv-min.vcf")] + #[case("tests/strucvars/ingest/gcnv-min.vcf")] + #[case("tests/strucvars/ingest/manta-min.vcf")] + #[case("tests/strucvars/ingest/melt-min.vcf")] + #[case("tests/strucvars/ingest/popdel-min.vcf")] fn build_output_header_37(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); @@ -295,15 +295,15 @@ mod test { } #[rstest] - #[case("tests/db/strucvars/ingest/delly2-min.vcf")] - #[case("tests/db/strucvars/ingest/dragen-cnv-min.vcf")] - #[case("tests/db/strucvars/ingest/dragen-sv-min.vcf")] - #[case("tests/db/strucvars/ingest/gcnv-min.vcf")] - #[case("tests/db/strucvars/ingest/manta-min.vcf")] - #[case("tests/db/strucvars/ingest/melt-min.vcf")] - #[case("tests/db/strucvars/ingest/popdel-min.vcf")] + #[case("tests/strucvars/ingest/delly2-min.vcf")] + #[case("tests/strucvars/ingest/dragen-cnv-min.vcf")] + #[case("tests/strucvars/ingest/dragen-sv-min.vcf")] + #[case("tests/strucvars/ingest/gcnv-min.vcf")] + #[case("tests/strucvars/ingest/manta-min.vcf")] + #[case("tests/strucvars/ingest/melt-min.vcf")] + #[case("tests/strucvars/ingest/popdel-min.vcf")] fn build_output_header_38(#[case] path: &str) -> Result<(), anyhow::Error> { - crate::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); + mehari::common::set_snapshot_suffix!("{}", path.split('/').last().unwrap()); let tmpdir = temp_testdir::TempDir::default(); let pedigree = PedigreeByName::from_path(path.replace(".vcf", ".ped")).unwrap(); diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index d63c4356..0213ffd6 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -1,8 +1,9 @@ //! Implementation of `strucvars ingest` subcommand. -use crate::common::{self, open_read_maybe_gz, open_write_maybe_gz, worker_version, GenomeRelease}; +use crate::common::{self, open_write_maybe_gz, worker_version, GenomeRelease}; +use mehari::common::open_read_maybe_gz; -use mehari::annotate::seqvars::provider::MehariProvider; +use mehari::annotate::{seqvars::provider::MehariProvider, strucvars::guess_sv_caller}; use noodles_vcf as vcf; use thousands::Separable; @@ -15,18 +16,15 @@ pub struct Args { /// Maximal number of variants to write out; optional. #[clap(long)] pub max_var_count: Option, - /// The path to the mehari database. - #[clap(long)] - pub path_mehari_db: String, /// The assumed genome build. #[clap(long)] pub genomebuild: GenomeRelease, /// Path to the pedigree file. #[clap(long)] pub path_ped: String, - /// Path to input file. - #[clap(long)] - pub path_in: String, + /// Path to input files. + #[clap(long, required = true)] + pub path_in: Vec, /// Path to output file. #[clap(long)] pub path_out: String, @@ -46,19 +44,48 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: tracing::info!("pedigre = {:#?}", &pedigree); tracing::info!("opening input file..."); - let mut input_reader = { - vcf::reader::Builder - .build_from_reader(open_read_maybe_gz(&args.path_in)?) - .map_err(|e| anyhow::anyhow!("could not build VCF reader: {}", e))? - }; + let mut input_readers = args + .path_in + .iter() + .map(|path_in| { + vcf::reader::Builder + .build_from_reader(open_read_maybe_gz(path_in)?) + .map_err(|e| anyhow::anyhow!("could not build VCF reader: {}", e)) + }) + .collect::, _>>()?; + + tracing::info!("guessing SV callers..."); + let input_sv_callers = args + .path_in + .iter() + .map(|path_in| guess_sv_caller(path_in)) + .collect::, _>>()?; tracing::info!("processing header..."); - let input_header = input_reader - .read_header() - .map_err(|e| anyhow::anyhow!("problem reading VCF header: {}", e))?; + let input_headers = input_readers + .iter_mut() + .map(|input_reader| { + input_reader + .read_header() + .map_err(|e| anyhow::anyhow!("problem reading VCF header: {}", e)) + }) + .collect::, _>>()?; + let sample_names = input_headers + .first() + .expect("must have at least one input file") + .sample_names(); + for (indexno, other_input_header) in input_headers.iter().enumerate().skip(1) { + if other_input_header.sample_names() != sample_names { + return Err(anyhow::anyhow!( + "input file #{} has different sample names than first one: {}", + indexno, + &args.path_in[indexno] + )); + } + } let output_header = header::build_output_header( - input_header.sample_names(), - &vec![], + sample_names, + &input_sv_callers.iter().collect::>(), &Some(pedigree), args.genomebuild, worker_version(), @@ -72,7 +99,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: // process_variants( // &mut output_writer, - // &mut input_reader, + // &mut input_readers, // &output_header, // &input_header, // args, @@ -86,4 +113,114 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: } #[cfg(test)] -mod test {} +mod test { + use crate::common::GenomeRelease; + + #[test] + fn smoke_test_trio() -> Result<(), anyhow::Error> { + let tmpdir = temp_testdir::TempDir::default(); + + let args_common = Default::default(); + let args = super::Args { + max_var_count: None, + path_in: vec![ + String::from("tests/strucvars/ingest/delly2-min.vcf"), + String::from("tests/strucvars/ingest/popdel-min.vcf"), + ], + path_ped: "tests/strucvars/ingest/delly2-min.ped".into(), + genomebuild: GenomeRelease::Grch37, + path_out: tmpdir + .join("out.vcf") + .to_str() + .expect("invalid path") + .into(), + }; + super::run(&args_common, &args)?; + + insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + + Ok(()) + } + #[test] + fn smoke_test_singleton() -> Result<(), anyhow::Error> { + let tmpdir = temp_testdir::TempDir::default(); + + let args_common = Default::default(); + let args = super::Args { + max_var_count: None, + path_in: vec![ + String::from("tests/strucvars/ingest/dragen-cnv-min.vcf"), + String::from("tests/strucvars/ingest/dragen-sv-min.vcf"), + String::from("tests/strucvars/ingest/gcnv-min.vcf"), + String::from("tests/strucvars/ingest/manta-min.vcf"), + String::from("tests/strucvars/ingest/melt-min.vcf"), + ], + path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), + genomebuild: GenomeRelease::Grch37, + path_out: tmpdir + .join("out.vcf") + .to_str() + .expect("invalid path") + .into(), + }; + super::run(&args_common, &args)?; + + insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + + Ok(()) + } + + #[test] + fn smoke_test_trio_gz() -> Result<(), anyhow::Error> { + let tmpdir = temp_testdir::TempDir::default(); + + let args_common = Default::default(); + let args = super::Args { + max_var_count: None, + path_in: vec![ + String::from("tests/strucvars/ingest/delly2-min.vcf.gz"), + String::from("tests/strucvars/ingest/popdel-min.vcf.gz"), + ], + path_ped: "tests/strucvars/ingest/delly2-min.ped".into(), + genomebuild: GenomeRelease::Grch37, + path_out: tmpdir + .join("out.vcf.gz") + .to_str() + .expect("invalid path") + .into(), + }; + super::run(&args_common, &args)?; + + insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + + Ok(()) + } + #[test] + fn smoke_test_singleton_gz() -> Result<(), anyhow::Error> { + let tmpdir = temp_testdir::TempDir::default(); + + let args_common = Default::default(); + let args = super::Args { + max_var_count: None, + path_in: vec![ + String::from("tests/strucvars/ingest/dragen-cnv-min.vcf.gz"), + String::from("tests/strucvars/ingest/dragen-sv-min.vcf.gz"), + String::from("tests/strucvars/ingest/gcnv-min.vcf.gz"), + String::from("tests/strucvars/ingest/manta-min.vcf.gz"), + String::from("tests/strucvars/ingest/melt-min.vcf.gz"), + ], + path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), + genomebuild: GenomeRelease::Grch37, + path_out: tmpdir + .join("out.vcf.gz") + .to_str() + .expect("invalid path") + .into(), + }; + super::run(&args_common, &args)?; + + insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + + Ok(()) + } +} diff --git a/src/strucvars/query/genes.rs b/src/strucvars/query/genes.rs index 6e120dac..11509002 100644 --- a/src/strucvars/query/genes.rs +++ b/src/strucvars/query/genes.rs @@ -3,12 +3,13 @@ use std::{collections::HashSet, ops::Range, path::Path, time::Instant}; use bio::data_structures::interval_tree::ArrayBackedIntervalTree; +use mehari::common::open_read_maybe_gz; use prost::Message; use serde::Deserialize; use tracing::info; use crate::{ - common::{open_read_maybe_gz, CHROMS}, + common::CHROMS, db::{ conf::{Database, GenomeRelease}, pbs, diff --git a/src/strucvars/query/mod.rs b/src/strucvars/query/mod.rs index 10e8ad8a..fbc6cea5 100644 --- a/src/strucvars/query/mod.rs +++ b/src/strucvars/query/mod.rs @@ -23,6 +23,7 @@ use indexmap::IndexMap; use log::warn; use mehari::{ annotate::seqvars::provider::TxIntervalTrees, + common::open_read_maybe_gz, db::create::txs::data::{Strand, Transcript, TxSeqDatabase}, }; use serde::Serialize; @@ -30,7 +31,7 @@ use thousands::Separable; use uuid::Uuid; use crate::{ - common::{build_chrom_map, numeric_gene_id, open_read_maybe_gz, trace_rss_now}, + common::{build_chrom_map, numeric_gene_id, trace_rss_now}, db::conf::{Database, GenomeRelease, TadSet as TadSetChoice}, strucvars::query::{ interpreter::QueryInterpreter, pathogenic::Record as KnownPathogenicRecord, diff --git a/src/strucvars/query/pathogenic.rs b/src/strucvars/query/pathogenic.rs index 90becddf..ef402574 100644 --- a/src/strucvars/query/pathogenic.rs +++ b/src/strucvars/query/pathogenic.rs @@ -4,11 +4,12 @@ use std::path::Path; use bio::data_structures::interval_tree::ArrayBackedIntervalTree; use indexmap::IndexMap; +use mehari::common::open_read_maybe_gz; use serde::Serialize; use tracing::{info, warn}; use crate::{ - common::{build_chrom_map, open_read_maybe_gz, CHROMS}, + common::{build_chrom_map, CHROMS}, db::conf::GenomeRelease, }; diff --git a/src/strucvars/query/tads.rs b/src/strucvars/query/tads.rs index ec104540..d44d397c 100644 --- a/src/strucvars/query/tads.rs +++ b/src/strucvars/query/tads.rs @@ -4,10 +4,11 @@ use std::path::Path; use bio::data_structures::interval_tree::ArrayBackedIntervalTree; use indexmap::IndexMap; +use mehari::common::open_read_maybe_gz; use tracing::info; use crate::{ - common::{build_chrom_map, open_read_maybe_gz, CHROMS}, + common::{build_chrom_map, CHROMS}, db::conf::{GenomeRelease, TadSet as TadSetChoice}, }; diff --git a/tests/db/strucvars/ingest/delly2-min.ped b/tests/strucvars/ingest/delly2-min.ped similarity index 100% rename from tests/db/strucvars/ingest/delly2-min.ped rename to tests/strucvars/ingest/delly2-min.ped diff --git a/tests/db/strucvars/ingest/delly2-min.vcf b/tests/strucvars/ingest/delly2-min.vcf similarity index 100% rename from tests/db/strucvars/ingest/delly2-min.vcf rename to tests/strucvars/ingest/delly2-min.vcf diff --git a/tests/strucvars/ingest/delly2-min.vcf.gz b/tests/strucvars/ingest/delly2-min.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..98f68dd0371762df38043a24db2b7d7b800fd027 GIT binary patch literal 2324 zcmV+v3G4PBiwFb&00000{{{d;LjnNz2(_7QbE3!^hCiEM(Y5(_qV_A$xNH@R#z~bi z2AS-cPg4Y1Wfp-2Og8)LPcAI8?gUM}y(q6qRRE2A{AezM~jtfIISc>yxNC`9^+a&X1$=1Ncb@qNLcZR#6a%qy))s`zmsf zuE1haRF{`-F{p$e=J!vPtn)CLkMHIcY2iEXBNNs`Qi`?68=)p3NRw3*iX;#qN=lKx z`7!YGbeDu+G9Nc}7f;I{*Ihg<8@iude3V6d*=3nW$@*N?+2rXqEz+$3t2CD~7zAmW zhf(5}G7Jd(Ldewk$v$X5I5zQPP+kmJZ9qyj47B!ZDw1>?1t3d{NPbiKknVpZxH4A% zFZLt}PjS-~C^rHWWxfl_UGB%=&Cer0DI5KoT%)}3qg;f2k%XYwWm%e+*Xlpoi|1=1 zJc=;#6R`EOEK1gOai=Sg)v=3n>81PQ;zbrmWj`1GrT8_%%ewh7FHM!Ezxz1#%TAFX zO$w0|y8`%ev`)5iIv?CN=EKu`Ihij%FRz#st@?%dV<)FxLw53gyUv9M5J{P*S<|9- zg*UaZqIKezyIfAwa2JTmZ6boIn#+%@VRiX3o6N5nqk3rtu}IeCrkat<7w^krBG=)@ z!uasGa3{{gvQ7j%CcGP?RDl!Sbb%)A{nsv0CIJjDk{x$FVxBGSS)gSPOr8di>kM z+bGD>O}b6vbp3G*vn%Af;DfFI=KzQ>DnS?(r7vqcT>-q`l%L#3d%t~8uPJT06Vzp^ zHl7_DPdn$>EL|hmF9JUnAy^-TKpg!p;%JkmVNn&a>)yHcoo&}daS`jP>F^&;y+@yMY@h_4Of0q4zoP{CW5jU2D{g&9AY?tY*>_j83kWMzx0Q7gW=S5Hbft8 z3ZjSX@G@BWH@CO4Y@&7L7v1fbKnqij8PqWPju2u{(;I+Z1B@eLXj;>xQ3F&X8ig1)0Db}xMud`P9uTYv z!Wf}CZJIzxAt2NkYIYRi6G04y49=RwDVvBPjT^erBu?psI-#6V*d*AAz>FZmo1H~< zLOCRi>WnmjI-v$aoMChkqfV%9KwZc5!e*UN#t}BS(F}p>d};(@#&9zPuG2{kg!yHz zaGgzt2&7DFru)CAs$f`$?4qu|oIDbpTnapTwSvYp-ncrQK?pI_2PdOg!>th3P7q=c z8toKUD9#aSr3gAvSSNQ&90mG(_9`NOhH>xZM|3 zBWfB$Z4~0FmX37R?u)1tg)$Ip?<3_Z#UO6?b*9BphnAsGT<;i1N>z&E_97%_ zTAXy0DppDrBG^%1RHYa}?bMl;#@fdbY6^uQY}cYtr8w#ErJYk9Sw#wkQHL)|6p9eq z;Y;l!VYtJW>Yix4BVXz=!HkO?>_Dl$81L`}l@T%!xmcVJRjAYihtTMl9V&%2gSTrb zSw&Ez9YchbP9kiOj+Fu%D#j6R?EpqfHxbcw)Uj%+-WVgiJ++Xl7)Kq;Ia0l82+?*m z49_s5eI>w%s2IhhBdFR1O}pHP4x^xULEam*rx&TCpkqWkf@)_N<{fJ63}dvTj?OU3 zjP}7oY8P~lI#y#$&oGU5)SH@K15(D?^QL+;u)#aFOsry>!8*J(Fs);fLKV{?>F{>e z1+I5ZKs6y8(T?iHN>4FDNGI)TN*5T_=|w{3&Q}O^4YJc9Amd{F+yA6EBJxI3=d^-e zgH@TPaq;yn67MhjXC)RI$Xfc(rYz#jgM@#qFLZkYa+HyR{o=&IU)bpI_bZUFHZBOOj{zCQ6 zwx^!$$@h-D?m6DnlV5wD?Rm0^=XjpuRD^Bsnd-CSc(&u%o+H1lT06Gw*^cLV`zupN zN;nnZ$d~GGA67Mt^~~waoIacPp6NO>({)VO zaZP#9Vvb#N{9?MU>Av*f5Se{8;E-Yc4&L<<9bm3glW|jyl8GTPiB76lY(f*7x=Ht~ z@BkA=Sk(#*2-lE_@xB!@43l7FA_5PsG!ySzp#da_RE_vR=g36RJUluy3D!-9n2D)= uXoir9Z~g};XNc~M9RL6yiwFb&00000{{{d;LjnLB00RI3000000000z1cI;t literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/delly2-min.vcf.gz.tbi b/tests/strucvars/ingest/delly2-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..dedf2b5016c5e8e7d1736e0df6b74d1751b28f96 GIT binary patch literal 115 zcmb2|=3rp}f&Xj_PR>jWVGP`bpHfm%5)u-U5)v9N@&Li9fLWc5jRysMo^d#a-0(7X wILOb(1_bLp1wvmu>fu@8!0LHcqjzo_BZK{|{>*8N3=HyU#!53VgKYs30F)XY0{{R3 literal 0 HcmV?d00001 diff --git a/tests/db/strucvars/ingest/dragen-cnv-min.ped b/tests/strucvars/ingest/dragen-cnv-min.ped similarity index 100% rename from tests/db/strucvars/ingest/dragen-cnv-min.ped rename to tests/strucvars/ingest/dragen-cnv-min.ped diff --git a/tests/db/strucvars/ingest/dragen-cnv-min.vcf b/tests/strucvars/ingest/dragen-cnv-min.vcf similarity index 100% rename from tests/db/strucvars/ingest/dragen-cnv-min.vcf rename to tests/strucvars/ingest/dragen-cnv-min.vcf diff --git a/tests/strucvars/ingest/dragen-cnv-min.vcf.gz b/tests/strucvars/ingest/dragen-cnv-min.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..249f90c5a0b4d26a6a4ebc105ce652cb0980f92a GIT binary patch literal 1544 zcmV+j2KV_NiwFb&00000{{{d;LjnNn1-)0za@#f+UCU2_v6r7o0^q+=Wk!)`#oy@1 z6&0slaUc?sFd;z(1nD|Y-vKF8663mV7hTx`&OPVi0QUmCx!JLl?sAb4IeMJj)s~L0 zZf>UQ@hn(AQc<#;kAAtIjuJukl;6G#U58(X0J=KFNO!QMn>vEJbqhXU3}1ijxF&g) z5uVH$r{{cABI)RQk#n#h0$>D?KSX$FnP3uZ0EQS}-=2l83dwR_j;=LL(4Cer0gO_T zJ>z@hP-TQ0g6^oGJf_B>G~L8L{?T_t{}y4S?;_;rS%fSfXxfgH^1G%SK)}ejG|FrX zk#?vuV>t`g#$!KYOA|;)W zG%bzgSVP!zy|1%Sn^B%;TFHLUC`8d3MGzvr(bxAsE>gk&aVofsy-q6kX&(mKp+aUPtXOu&dHkE#h@0{F>6bnJc5fZG9VADN!x^po^D zfaREu1u^ac{1X6SWLjq52LwBWY1^jdTKyytIzvF{*r>lygii?5vytc9eZ^9V9&}x7{gh^B zsO=-{`CeZFcYZo1#I}w561cO|@(}ia_6m1y+Q@{K?ey*b?Q|+^>_WReSL6AnnVl>j zZ>O}31#2$w(e)%Ro&m42EfwI{V?kTh*XME5V15}lr73M%pP(s`tfoNbKprUQYVtDv z%fn@S+lN5D12m;Iku(AESt@U{-Y@S~4edI(n+MC=WfL?)x0~mJj?~4wr{Yqtg($ss za0iN9UCcQz0poF6B{Tty19@O25H!w3aw!u&o~h_>s7%@Jn5W`vHv(*_e4>;$!h|Pa zJl_Cyi>1ddcP6v>{UPYBCX|Bl{iTeWP)jMLsA5?OlD_QSC1J3fzQ>z@C!HgTRvG?s z7Lo?I{TqWO_YbS^MGC(mHBonRax_R%QIix9kyktc538_GdY1pcBn_6+hV<@!z6sV1 zZOrR=voSrAN2%H=WnS0q)Pza`cbpHP72H{;-ch6bRHl_1iO zt5~fUlii(c?uniF{$tG+UbIM3^NGE2J8)KW7_c%OLku3iw7 zl4bpt)2I40`DbmV*_5V6=y?M@V?32MeitLl3XvC7q|cxrOwa^So`CknO(ZBK5wdA0 zrOXSk6_k7{a>ixbRVK%0))&syS0$6rBdD9E=^%J@Gx@w;Ed~#(aBx2ztb@D3c)l6@ z`ei&Hw5MjEig3{8J_yH)hk0;?1_*nmXIX>xLvlD@h2anl+fiG69$0F2IrwE7%s&n= z9CW2NXgcUfQ|0ubZFA}>`VcJD`=E*Y(Dw3?f(&MxVYnDhmcvhz;X^Qh1{y-$#K?!f uQXHaNboD2EbW!m#4*&okiwFb&00000{{{d;LjnLB00RI3000000001fY3EG< literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/dragen-cnv-min.vcf.gz.tbi b/tests/strucvars/ingest/dragen-cnv-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..2b58902a515b0f2f1e4b65a4bbce4246a9f910bf GIT binary patch literal 114 zcmb2|=3rp}f&Xj_PR>jWp$y!GpHfm%5)u-U5)v9N@&Li9fLWc5jRysMs(hP1h#02id)lj;o0LX0r97Ym zdH-#{-xnEKF}~`rcN@a{<$8ZDR5R2M|Meo$`>@2Q7cG&|+bof0zq+{z5}vWu;HSxO zfVwf|t86{MnvHd6VD$4j0G|LVvTfB=YXJ2KK(+-j)>K#nXeR(dx`s`o2GC1@1#P5b zqi(?{0VYC7hlW`Lm?c2Bk!l%6U8Ge4G($Bsh;a>Ip8yCWRa5J6K(Iupx~^)bRu_R# z1p%RWtz&e-Q3lksplM?5LS`9I-A355tvUrR<7uc6>pHGe;4++=g|K}YD_llX zM=I2GqaNX7S@_kdIhCvW-|&_bg2f?ntjyFDoVr6 zy0j{zVd$s{h3u*=6PbFmF13o%>=s={1!W<<*+Zin7%fUB)@8C90@` z!l*@;rdCj@sVjch4x&RAsxHX0L34OfH47$2S_`Rh z2E%rXHT?{xYb|wj2Gew_Ia#Rcf@!0cZj7}vm|?fnTQ$5E)O5W$ZdGj-w(ORXiK{Tf z(p$9EU`ER(g;kgd)fR1MF4$(<2CN3eMp{et;){#QI=93k=`Y1n#}GVa>!&lUyR&|H|oxIn`fF1VAp@y(!pI!R^1^LLCUrn zOZec2$!43;^|KZH4_vHUEFHNGtq?e_P*Nn;qkVF?Ic~&=g zKbjrw^$L3vp3xOe&kYV)ba>7JjDx~+@JzFhlya~b-2uWw;N1Iw#4(MjIHwuShBeWR zBqJVzZ6cWbNvhA*^e7vIIq;c=l0REB0a6+yX?RZLPUa8ZkqUo-%1wC0!h=54ZnUER8;y=;!@AM2vp5=c+B2Es0EyX(Zz#_|l%#-?U=2u|(Ug-+a)mTL z1pb_4>iDC{u$JE@mdQ}Ydn2tqr<8o4ycXEe(`e>jQFw(Rx|dRk5J|vDMnjM#Eq32M zOs}lK`=aKO2s{%(!IrQT$Y5tIATd}cn(%`WJOA6{{W2Rahoiaw?wUclpda8$I3Prf#v$0I$)1H&*f~B9bnUOb+@3LVz{d%i6eQSQ#XJ(rI;McwDI;4g<*e7|#3#`EEH=2ku?M3Be|7a*oW6n3?xmiRqq}ZNI^X@nfxzKQ`&t!Rx|A=>f@(yg9>L@V)N6OEwD{ zor{sjrOz*`WG5=epg?-~H*yN=&VA8shof4WhBT(-U^t>CGq34nTZawb&u@KaHXKdQ z(LUiJ-GJfYoRD^yP=;$XYrOLDO79WNJ(2o%!qdX*m7;kA4o3V~5N4CRFr)A%eOj zjzxdwC7o;OVXE9 zh>MlKQBk4#ZS3_=2BmY#Hl-2!1kORrGOLU6WC(s6d-=ECw1xwU!gm`bTNiCgGk~H{ zPM%|`Y{*v0U($dq-O+~Y2NkvD@&Q5Fko1GxROjo5-m1vWBAaQk_*b9=X(EIo-$^R_ zAOihloAqhPf}|hFmr$#Iv5l5FZYgnO!+2iL%aj(scCj2U+_h@<_CZuc{y-Y={gEl) z`{PwX<4EC$gIzJ#&$WtuRi8XYkQG%@x?%}eP<^93)3EOjJ!Owp1s*N6w>8(PTUF;i^-dK|7u^o!^gMp$^h*OG9dBN_a+wqx)NA zLk-`;FDh$BtOo1|4~)RiXvv6EkR9cS$1*L>g4ZOK1=@x^ug1L@e648GLSaXO}fJMzJ& zGn&cY>98%`f^UmA-*N=J&EMDz6l9oh^OvIoG)0Cve4D@f9l$QK-WIPR2E}*Y=y7oA zeP`_7-ubuV$J_b+?R;_TEjp-=ZYA-puHITm@9IVu-oh@H|KO|t1Lj6r_0}W+03VA8 c1ONa4009360763o02=@U000000000007&15lK=n! literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/dragen-sv-min.vcf.gz.tbi b/tests/strucvars/ingest/dragen-sv-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..91f491df22e662ae6d52582852fa3dfc9dc5f0cd GIT binary patch literal 116 zcmb2|=3rp}f&Xj_PR>jW;SAh`pHfm%5)u-U5)v9N@&Li9fLWc5jRysM)(JJO&}i{A x)F?DIFfgd_ZCc?Zo}S{s>gkc?_A;<)iW>vNF^h16Hf9C}c{FpS8GwRN005R<9aI1S literal 0 HcmV?d00001 diff --git a/tests/db/strucvars/ingest/gcnv-min.ped b/tests/strucvars/ingest/gcnv-min.ped similarity index 100% rename from tests/db/strucvars/ingest/gcnv-min.ped rename to tests/strucvars/ingest/gcnv-min.ped diff --git a/tests/db/strucvars/ingest/gcnv-min.vcf b/tests/strucvars/ingest/gcnv-min.vcf similarity index 100% rename from tests/db/strucvars/ingest/gcnv-min.vcf rename to tests/strucvars/ingest/gcnv-min.vcf diff --git a/tests/strucvars/ingest/gcnv-min.vcf.gz b/tests/strucvars/ingest/gcnv-min.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..8a5a8806903b9520b8aebda3293086c2575362af GIT binary patch literal 3225 zcmV;K3}*8miwFb&00000{{{d;LjnMN4DDTAbK5o+eO7)2CVAPNF=Tx3(_ME)mF3jC zjvPm7+CFJu5|Y?bqzXWGy}y13lte)^={PRR&T=}LOk(2j;@pFai%U^+{qEg7N%=e% zE2hpr4=?YXK6&%*-R0HwboA-`Uso6Bm!C$rCl_2sB3Y{>&(7ao@U>dPJWC%zl&>E_ zwprbA0R)e75rcpeWr{DjI3_aTixtll zSmjc|H2Ib97{MK*h^1-O^80DU@XmPePrB=zmYI&%e5&ZGi)#k6Cvq+&w7g5mqY^gqLVwPIO(on*YA z`fok@o{LqQWPCXOyxU<+TRt(tZ=XW~2v>Q`Q>a%9CYj>mo~05zso)`G7^ z9&ssW<#2znNK&4u*<>U5gr_`Gv+p-6mdyrfIvf3-XS2`2X}(c^&_sU^pQVXZ0K$Bu z)*H2DclW?%E50IqJxBlgJ3?oexD<{tyciEK{BbnF1&5aa`}fhjzxXeJaLwanyF2wH z4Ypwp z^<#JUPk+wz_DZs3wOPU6B$QDuI3Q?y6-uVF2$WpGxnPmrjxfLHB4ulU`WhAUxnPIQ z3fC--lWbAK0qPSMKp3SPskqQX`yFFg3XTzXg0o+F7Q=@mi}MGdCn~NL`3B0(YQ@AO zlu9tg7mwh>=;CTHo=v8MPt)1%htc=~K*&-BS(UuPu0th9K@+_9{j;jaGGq@J{LU1YJW*sxdCBVDL zNF=)}g6Fr54nP^9uI)B8CCdx&X7 zFG#smYc8N%G7%S=r94aY>9~ioINn>5Y_*kut>CU2-py!$?js#f#eoO)elJO-`6h;I zy-8KHgm;_yoD1Nfa;R|O2>@b4-%=_Vv1qARdCyWNip!#AF9_^Trv2!Wr|a^EdoHB8 z8giLGe4oTI&rHajE(;#P)%fxTKzRGZ)o5A{ZzPAm#J}ol)&&?<$0=SW1CcM19zUJiVL}&;O)uhtxxA~oj&cO zK6-m{!4yA#J6MY(1voqb1QDw1%ka!4XV?LQ4`4W&OdAWDkGWL4GQlWThB|j2*E7!m z_0sBpx6uXHOfBoq7LA!j+5HQv5QHe1yonj@r)eo;~iRt(LfB zi#ZavoFlQtn89tw47JC-wAB)KVlmH`jpvMxK(%i>tAer$=X4tLZ1JBnH;M!IbD?rTa^iSRHj$H z8T^v9uE~U~ZaUP}P4~+8onNxnHJM)dYV}Ljx+W8~DiiIhOvq~8ggRR{c{XLLc{|+= znXuJ@0C%+@z*aXM?(C+cHf5@Lz26O)h}DyhboHcrIg_EvU;n->l$6Za~)M^T+ zT}|QGYI%vfT3#ZnpEse-e%@rY3ddco!m-u533s(_LRMu$U6qMg%}u1Mxd~a#O{lB6 z30Zw*fV%q1fLMJwPCEN?+^S5dt1@A$DI9k-g%hifzNGolH_9`WEZ{oLllXD>jm$!t z%A^HswuRM9T%$n^^RG8@#0&2GJ#8E3pf9#WVrjY?-A)aDs=N4e*%b}$dn?A(K5oa3 zwAFBIPeOOtsw25;iuadtJfar!EKeklw<{rZ2c^H1T*08P9DIubuS}07u#M^kH#qKqrZD({oNz$?;crydXFqex5+x{ zk)@q18ewIqlmN%0o`VRf0Kx+R6HFai)d8atL0y+R zzEdTEv4KFyb8*#COb!SdxHt&iD&n9fcCi-*eo#dmD1yR-@@1r0C`U4oy?Q3~n@$oGl!gk~wI8)6cKL3IjJ>giELT$fa*Af=p6fJyjN zD^jZIVu~EstIGY;Au3$rBlk&M{e0j^efreEhiPfSbG@)K`uXdfH?Ii*FZUrUynitFX~~Z-bcqb zQ4Xo+<(M;io{Q^HI5chf*mvuBQ4{5a4ZJ)96=1jCM<+BhnAAIh zk+8v(=XLatlZhHXND6gn5D6 zz^wxF8a631VLqY_+>Trb{iY3Q3KC+cA$p17DZv=~wBDNGg6sIs6G6tq8A85?+`}T^ z;7R`5cI@9nbRp@7P65)N+I^1ZlW56TZ2v@0`smHOcQW6Ih(G(AI1Z?1x>mn=H~jw7 z&4=E{n@R8LqW5WZ*&AF>d$&Ifu6x_x9Msd#O}*_eD(OuIA3k1>-r$~tiGvXC^?T#q zzb;1C-}d^wes46sICn5{5klW=pR&#`M%UjI&voZ848v~*!*l%2V0?~y@26+O@!9y} z+3n!$c5-$*8J*ouCOv#s{6W6?_s#zR&leqhQfUAHABzYC000000RIL6LPG)o8vp|U L0000000000P-s5F literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/gcnv-min.vcf.gz.tbi b/tests/strucvars/ingest/gcnv-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..48b1fdbe1f678cd42e01e80427fb0119e8df2520 GIT binary patch literal 126 zcmb2|=3rp}f&Xj_PR>jWi45F@pHfm%5)u-U5)v9N@&Li9fLWc5jRysMc5yg|*qljd zXo`|!1A_IQ0--M+_3*5Cz&~kLVAT}4rS31mR69Q}GXuAw(55gB1_pUF8>AVS!4`rD E0C_|r3IG5A literal 0 HcmV?d00001 diff --git a/tests/db/strucvars/ingest/manta-min.ped b/tests/strucvars/ingest/manta-min.ped similarity index 100% rename from tests/db/strucvars/ingest/manta-min.ped rename to tests/strucvars/ingest/manta-min.ped diff --git a/tests/db/strucvars/ingest/manta-min.vcf b/tests/strucvars/ingest/manta-min.vcf similarity index 100% rename from tests/db/strucvars/ingest/manta-min.vcf rename to tests/strucvars/ingest/manta-min.vcf diff --git a/tests/strucvars/ingest/manta-min.vcf.gz b/tests/strucvars/ingest/manta-min.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0991fd67f8c06889cbe233257d2e11651a65687 GIT binary patch literal 2488 zcmV;p2}kxHiwFb&00000{{{d;LjnMs39VUcZ`(K){p|b-Vgl^6fMb)Cs7E>;Kr27$ zUB_{3HM{dEU?ke+RHi&4oi@e(_yr|DXjv-KnQ2isk&ezi_uR`XGP%B9@{}#Jd_z_5 zF&w_w9dvbFTo^mNuX?|X`#sc7nOLcH58FN_(81`}a{xX8Eadx^YgGW&0f2lDVr*Nm0iN7(bdY6yJv)3G2X1Xn}g zqn);guz%?lKDtSe1#RL~?f&nnD+qQWx#+9kP8u!Ox+J)ZjtmZQ{L08z|pLRxQdxClq!qTrXrdgiUYBJ=v@8$X^(bPXY}>6zK|`12 zAiF+C+ci-(uGeLo5gmu1Iu!CvE*H6^UYBK}Y`;O5W1u`l>T|Sx6Gb4d*LB9_qXsU| zK>2P%9c|l0`MACbturp)YG_q#v?@ffp}%YsMJ!a0I^%LkeI21=pb&)hTsAaOzSW@1 zIY%|PiVPG+4Z3X0Kv|aEpv%nBBDg`9sorw@2EWWPLA;AM*dAr-B7TD|XjI5U*2Qq% zccIY}K7?Mw>M&8*@%(x&!&L+wyP-tbND^VsYM2z*Ghsf$wFzKkq=_uoMGaHa)JBNq z*INsH6Xv6aagI!F4nlT48-`~vuYM9>WSKAIhWN;C=w57OiV;GtRnKOmKy25(Fk~7&LFhV= zoQiCcbjLH_h37E-Ll>=rK@(I$vEd* z#WT_SX}sCyEas8{sq$T{b~#PK3(Yyz_kTKLT|7p=&5o3)q+F<+iwdc}0kY)y&(Bq1cM$=Y)N}+#jz17of}>JTvu$F#+a-A)q1w!6X7SObpUlCm! zWFQi-%_P^~Y4vK&4zfW~0H0Z+)=k=1LYgKvTXF8zvN3W;tLqUIUscmJ6zMtc0a| zoFIP2TxebEzOset6kIdjpSR2UbsCD@72 z`$u$M=fUUW(Z@d#J?E?S-x2NX1WDbPKDH87SZZk zTra|>zYr;Bn@q7vsP8P*e|?&U(fEG)^zbQ|yd~VYoV1Ha@cT~01wEHwOLL|DqXHC* zAH3tN11Q++QpIm*@;i;0h+hu_`hB~c%1h4D1Z?x{g(plF#99Yx5fhnu-LmS#!?!%Vpqp*Vz>@2M+6F(*g2m!zJydyI z{9sb8snXMRkC0$ZU-T@Hk|q0uxCF9bxlsS`mI55)gNGeWxw`ORb}ZxQJ1kTE6WEl0 zV@Vs#);UXVWK2_*fRuk`DPLz%16JfS+Gf6O- zgG^}Cz;mY7$35C%9?6>EcMouw)A-UK3gkwwt^>)tUl4?_c;U+TrB%zEmSZVf?#seO zu_DVwCrcCVg4qK+?6(+KYnC4s0roX!ERg_q?5enFHlsXWl+Agm?^q7WSE5)JLZz=D zrTOaMm)2M|dLd8iT@{-(U9&b2nR!prv2g^^qoQ~gY3F<)hz z>DG`SWm1;1Fswdaj=Et~9US{`@@w}(NcLi=*xB49Y*hKq>dgc zbMB!|CL-WnNe<|Cn{t-ux87b+5&MGsW44*IQub~Z)OMQ}Hp3~r|AraukP&3{G< z)YTAhbMs6$K&<4`rkqyYY^Sz6RXkhwRMvd2^Gr8a*Wt(c{atHzzi5s7t@&Ws3MNrY zcXX?)cT4YUT4kHHN|r0sLd5rMWVH&9`h&?W@}Z4y;o)y92!n7GhS4Yrf+!5bU=&6{ z7)7JeC>V{xsQ3)@H~PN569z#ThWq=)XB26OhK8XAYDl1|^h26elZKiy8U@AWFbGCL zaV?6Xf=mC2%8xLJ_CZIXju2@bt)S&PxTW_Tt-)0P4$Jq52S1c2ecg>8iZgoe208AB z;xO1lcgS)3ABwYX54Mr_p*%(P%C8oKhu&rOtxcF?e zy7drg6Q>PtVH@jz@aq4vB5U{89RL6yiwFb&00000{{{d;LjnLB00RI3000000001h C_`HGu literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/manta-min.vcf.gz.tbi b/tests/strucvars/ingest/manta-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..8f3640b6da192672e930372ca912991284124b8d GIT binary patch literal 117 zcmb2|=3rp}f&Xj_PR>jW5e(dgpHfm%5)u-U5)v9N@&Li9fLWc5jRysM)(JJO(3q6i x&}2CWBpx?eVbz7Bl9BsJ zKFE$wQgT~hfr61}nmBsyt;`|$@-u^g*SFF0U7*^-7c^E~s)%gc?Bd{fqYrh>bx z>j!&8FD@^y7g0KogO7_@unv<1&3Kd5qEezPf_I^mun|)6x`B!{4bR{GesO6O&6wiB zKUe_-BcPaC2*wk~npgzF<;CS?Qy%J!2g`YsKF|?0wralNH7_zA>>6vFJEIM|xV#Lb z)R4TRAxaLM<)P{x(4ER@)q^8`s=Y}GEHwoe`GQqmKk zCjpE2zBRUb0@hK0_#URzws23tJ_!)w*mUgn1e{L5!#;7S+t=WB0%JmmgWYjYVB85f zKCwL4?K|mp0=8?pHm0;E;GYBtC6;aVc_6rxupGy-$9CTdLS9H<>^h_ml+creOgHIu7k?pdA;sN2vcjP}))95DVLmYq(mhu5ZleFiGY~f>ww9J+A|T(x-|Ci$d`& zug?>GU6&6c=M9LWEmM>81#tbg$T+YECZxER98?K8&qU5abk)%a#YY$dSd~yd@LIB} z62%s3&hiGBG6GWq0W9`a%`?$(xUV_;T9u+ukROcNZOxhDHK-jcfPbrarg#pyXcW^f z+;jEF`N^j#g+PDJ$~tF-f{Kay>gnBCn9^j{i&3I#QEdAm zYI#dpz2yoLp*YMAl@wVUtxZ|OJ!^P_Ky2Vpd@ahy0>J~52M&)yO1S5+x{ac9 ztaVFOI}WmZ1|SOatTn)|T&NwdVX;c%n-zljA2+Knmk69Mw`q*PYJb-8&GIIl$4EPo z&g11`6{a`wZzEW2V1JNGRI*jrwEi|GrBFvl!z)%Z)jIe7&p-e3{+yiTF8#cozacpK zg9g;7`jDwZ%_KaqTCnF)@j_uVU%jOuXJYd_urx1ik#`jk&#!?MIfPLPOiC_!Gu4p0 zpdaxK$+`LET>m}Vw_nGzEboQn$8l%M<`^$sK(GpzbCjfEoTB+^hSoPx7%$S##tZXW zM`XU5foOi@m61Ch`k~DG;vi)&8neaq_0jjy>%L#<4#c+HP;}x^rvTJkw#lYS14hsE zZZ#OewZ61k*74#deoKAU2d>+q+KI*tBY0#D6s2n6KJc37Z37Yw-KD+Kok zW#mMW$%F3jWdRJTc2?$4)uOB$)9iXx9{)eYC$Slono-^;v2Xj*X-et_-}yg+m}ljl z7upe3KCjB=Q`2VY#{j6-X;a^NEWBW?#3@;5&JNCd0Gktw^ zGW$_X%Z(1FDj{C$iaML^bc8_<^_)JTC-&Yb?X()>b8mA{P%LKQP&J*>QJut=Z=AsHdCW2b9=|Q3`~3PaMq zhCOG^p*}g?L!I1(Kf#>bg|A*%yPKNh6YBY%zTI5?J-%5E);GyuF&o75>p>W$gMZ$J z(V$J?fu?NGMsSdX%XKurAOnIi9*hR*;N$5Q{rg}v&{z6Fn#=+{$*1jAB*4?-zG+?~ z^R3;of-t(Bn*AuCV}IJNDnTd@pP7RQq8>(no9Y!Hz$3?+D9_K(*2fHGzRS>-*bYM( z8QThNsWr}~(K?{hcpZ>w90p`+CPlDevffc(M~pw<$qk3d@|(5vMaOl*%>s9oC^S4Z?lLx-On;YTFg--|7B zJb#Q_OuSQsv0=gSd>1Ll$0S3{@u|N>nd4iS=_iY9^^fN(R5-rx4qZ%qO$5Om6V}M0 zbgC;nzjWK@8l5pHfm%5)u-U5)v9N@&Li9fLWc5jRysMs<<3O?mSCx rX!3po5|5j#u(RiGxKBK^{l^A60%gP1x0yfL}(zIR>_D@IrU~Mz(?AF#ZleW`2r?{i}b?o8P)J|ma zkc8!kd1CIVN0XUq{8+xPBU5Qok>@ZlIKZz;*gNt2=K2i%g?N^2Qp0_@qG+G=x=PWG z)iz-s+cE0ePskmLk zMSuU?6QfX5_ORzvJYRIfS4|B|6(unaCEgl&mSI(ISPSd)GE8qXLhf8#zPzsA5(;ar zUuu~*Z7uwz9OKUL1|BVAn&czh^B6g+F7d<>Qhr~LC%r;F6J_t0RleA!ecg}A6G_p{ zf}ZNDASk-^77ir&QEi0X!jw>s?z+&KWSTvrZ@f2gAtg1e{KKie+f_O@ohSu_Qj zp}x7Pa}cMkb+5pFw5%{awfzOm7PPKu+hfea6Gh}JDN7QkC00YQ6^RxYV{-*OTjVwv z@Jf_(rr7bY3rw|Dgc0JXwQYlFeR~BCtWIVNjw{ET2*pJ`d;Jq){$MC=91@!)uxrwy zesp8f7$%~AgeP`^-s~;-^{24QiiYTF4s-o1?%C2APi(T^qGrJawjb^T0{q%Av4qMpH#J@z1Vj_dxJ4v!DI3Mg<`p@`P26uV!#Yet_tJUwdE>fYEM#Ehny-;?;3 zK9gy$OACqcH*k8zdroWkErg9tWlDb&VYwY{0>9F&eJ?Fx9?O`K}m@j4eEH*ijtI@~d=`T?1 zi}bidO=zIy=YVypJ-V+F;+}!66Z~QrDIjXmO^HT0;1Yi26fH9Q)nJfqQk;VO%-G?JP$bEckit|z@v0u>7x0)}hj>vlp?J)>mIWw!5e{p74*0$d% zicZ@q+Y-rCh8gpvZ~UmWL20KsrNk7~St0l9bOD3Jg5IP)BC&Y-MPu#BcN2N|C;#jp z?u$LA`OIUu_kz|rb$rRQr<&V+2>PHiz( zXlt+OP~AkHAlJWk8lILbA~$lezNr^ESoE=?50Y82KiO!0w7J%aJq`(Oy1YyXsuqah z&emA#(R*Kq_<-_tWq0uMVtwpk+{+_9w&kAK7h``bC6o8&$nLL>d|xoTBRCo0xH6=F zI)f60)&<_I@rF1i-9t)x!5=*=o0KX_S|}pXht!@idM=A5Z5a1D!3pmQwQMvD%pVBn z6uGSQq-Uy|+dzGjz=25I>1)yl|GL_ZEOjC`$7H^L^eTL%2wwiqS4XI_s0Tkbg>{l+ zF6GO2OnNA78H>-KW>-WTRhUiIbgj6KfHr|^B4?h%LT4Vcu;BDGcezhP4s#zi?!8w# z7ApEEG;1@NBJ*b1qPIWD2`uNf8C&7yz_ zcvbD@4BBu^KO!w~TKdW@Vy2hQJ?4Tjo6Z5jvxEnuYULlBKyaJyS{qGu72}L{yt=1u zHjtQgyNbgt&W&UHT3!5#`L2&tdat#F&w@kZ0xDV`sg<4e!ToMX3D0_!Hnu>230YZT zR#r6ZeVUN{Ek;d2a0+i3U?B=Y?tVaJ@JFe14xcNoMGd!CSdsvo^_n<&8m13QP-I zNIHXVbfINicafRi)VyVe5PmxkcT9v+;DthL8D^zI~-Lh?Icpxlx0$>cjLfcFgIdi@psx_XoKHsx^;R-LXtM5Qd$kN(Jk%HG^VjZgfJm zaOkf)mP3D#_rfMmHjn>VIFqVrXg@el7kU9 z7=DL@^$trdx&rO{kOA!8E7%!daiuIVKK+}NytH8q7u%^-=3cgXv>D3SiDii~>ffa0 zrH%Z9%*adon&DyxwaWaeP} z$VTR@FO<7ESY-2`9RZ?VtgF4rm=nSTZFi95?$=MNdJ56n4maoDq>r@U&`O%F47lT6nThdT3pY7Z z_K+ddiL!?#na-3w%*aHc;^9K3^C}+xWTI5@K#=L8ipRTa;ii{WK7R~GFmbF^_rYYj zncC^)DElIcst$DyO@p9iqq&kAjq%-s=YP=tvG0t`c#dLkfR2vcta#0O#Oi-7nc zO#KlMIKngl0SQEy1|c9X5T?NhNC?6#6k#5QFbhYRM<6U-A}k{j7OxPNQI~__g73G@ z)foKbnB#&?rbumS)A)g+>XW`&ch94^VUUU%L9NLYp-t@@exR5?$hZ7J@%2d<>uwy1 z+Y=&DEBJacMR!v>{=MiW92=2(f-8tIu09%Ltu}|6_kbkU6q`+gxieoU|HqdTnC z=27;rKgc-LJPa~eQ|v#vKm992A!p71Mmym$Fcz2m0j6y%Qz)A?#I5fNKuWzf&gkIR zn@@IvX}^^zH2;G;2Dr)+C5)fd^9SP0kvZ9RxzQWL{%tg&@w!3$5k& zSxvY{iM)&v2b&;WO5@M6C=zi0vPGu?w|oD`f(@3GWQF%hmChzfgg4rwIghl)B?Zg_ z)j_WPA05bM0okl}HZk=Lvj7!{<7+eqZq1#msimo^rAe-($*QF(sHG|SPqY}5MM@@Y zE{CM8S#ZfGM3LwXNSy4Xb~#d%F9rP;RRa`sb2TQfQI~FyrJkt<`A6 zWvI)#0v29sB}*ZdEqI$vB!rcU-+G|*3fAnQ!{mE0k;RN^SRaGi5Wp7U376h3QL zH^(^mDOR?*YM}tQ>NUExC+m7&`P+DG!tlMYg$9MkvTj$*zD=A<9iDc=!l9K5Rdr0V zW$tR5O!;CGOc)M7>da1Sr;k4N>K43CFMaGWX1q?xWHSF(8tJr4DS9u>ayJ%tjd>VQ z)pBcXnl#%y@%7=>bD@_BcN!ZTCz8?Vf$-E+LKg`%bNz94LFwhz%ZVK&%2eZ6aLTu^ z=m093lCqYna=;$?~=rel%Z-7W)PyPcbmP$Y@P!lZkIYlA}ob zikuvs3e~6%GRFwJ>>o)}m9~>Ss`y>d@OEPhd`2SjyK2Kz#^oKO9HaUJe=3^2yH#Mv zi#)$G<5;*h*Sr^(VO}SelzPqEyWu=lovk@|Wk}|?s}rcrYfY|`$FdNjgUq-2>%78} zqfa8!?ff;!M@1spet&zsOfigrZ!Wwmu}U82$AD?dZPhAU@=dx(q_=<^IrHM`2CHLG zDJ|o2uo9S`^8s1fC@j)0^bR_uVRR8);g5Y2PyVO)wC&{4j$LvQo9VvV?zoiY7Y1Pj zqnx3RPC=8rN-L25#nqp2@kGYu>I!W)UR6Ldnwne^uI(zx{Cg+F;tXASQw>fquhPS? zLhz?vlatgY^>AOT{HF{2`R;#Fo-%$YHcI(G-v}yA%sXDZ8ZO?Wree*d)UUau#s2dk znHIDxxN| zWNkrfD(wzlxxIA2CWF*92XWid=z3`&2@bP$zgK*^k>i9{YAp-7JACy7-S$A`nd;B`wP7M$q4xwz0!$@e9mcnt6+57S^J@5|wv zxwO$#>3`O+F7Ix-(2nx2v-a*mx-G%#U4*~%mzHovnw&3KO*WVyw7cZkoT3{ZPWaf+ zjxXdo4n1|t!EShs2{gOM{5s4opDAJ9#f#+}A0?usWaLnFf|F}C9Q7APZC5?0SDTDj zXHEGolu}o{dV8)a_+kAgHLzRzkJp=iXCYd})mEaW{eEir_IT&vU%(I$FUd9cBsqxm z0*@P$Iuw)*dz%}KAgtrcexxzS7GpCpho4~za&EMkVGNYS@rD(-gj-pzgFTRKe4b@v zV@vxXcHszhPzie|3XHMFxf(wJ)YA=6@ZWV9aFOPT&5e4$BYKrSA9lB<)-}u4$~yi* z&di$t>0Hcvt!64yWvCPjK}q{9_3~6u#y6Du>72_XYZgI`mDc51Aa3o!zjDFYWgGRO17wjbn!{^6M&Fi^N?mTjzY~gP)Mb)&eN2^YG?KO(4P2PyVKF zRD>PTds2a&jy3;+^%SEXsIE>hBcD2|IiFf@kcvMAkWX#&lvDKw4UE1(Z_wmR=IE74 zr93NvI=N$>E>&Zx;RuS1#mtgV8Ti|VN4LcU7_9lL4z@#d$7v}TK2uNt|K){~mEywR J51zl_KL9FAe}Mo1 literal 0 HcmV?d00001 diff --git a/tests/strucvars/ingest/popdel-min.vcf.gz.tbi b/tests/strucvars/ingest/popdel-min.vcf.gz.tbi new file mode 100644 index 0000000000000000000000000000000000000000..a3b2d945abba89484141746491f90dfa54d1273f GIT binary patch literal 114 zcmb2|=3rp}f&Xj_PR>jWp$y!GpHfm%5)u-U5)v9N@&Li9fLWc5jRysMs(hP1tXr4d x(B!Vf6A*hMVUvIhPwZUtNjEfXy@DsVF*4MdR4zWp#K0hrW~wv;GuRRk0RZ3sA6x(c literal 0 HcmV?d00001 From ddf06d093f2ab8c929279c9819fa418ff121a075 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 15:22:24 +0200 Subject: [PATCH 05/13] clippy fix --- src/seqvars/ingest/mod.rs | 1 - src/strucvars/ingest/header.rs | 8 ++++---- src/strucvars/ingest/mod.rs | 5 ++--- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/seqvars/ingest/mod.rs b/src/seqvars/ingest/mod.rs index 1bbcd512..0ae9e9a9 100644 --- a/src/seqvars/ingest/mod.rs +++ b/src/seqvars/ingest/mod.rs @@ -467,7 +467,6 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: #[cfg(test)] mod test { - use std::io::Read; use rstest::rstest; diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 6c1b40bb..8770b2ed 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -272,10 +272,10 @@ mod test { let input_vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? .read_header()?; - let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(&path)?]; + let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(path)?]; let sv_caller_refs = sv_callers.iter().collect::>(); let output_vcf_header = super::build_output_header( - &input_vcf_header.sample_names(), + input_vcf_header.sample_names(), &sv_caller_refs, &Some(pedigree), crate::common::GenomeRelease::Grch37, @@ -311,10 +311,10 @@ mod test { let input_vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? .read_header()?; - let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(&path)?]; + let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(path)?]; let sv_caller_refs = sv_callers.iter().collect::>(); let output_vcf_header = super::build_output_header( - &input_vcf_header.sample_names(), + input_vcf_header.sample_names(), &sv_caller_refs, &Some(pedigree), crate::common::GenomeRelease::Grch38, diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index 0213ffd6..519885e8 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -3,9 +3,8 @@ use crate::common::{self, open_write_maybe_gz, worker_version, GenomeRelease}; use mehari::common::open_read_maybe_gz; -use mehari::annotate::{seqvars::provider::MehariProvider, strucvars::guess_sv_caller}; +use mehari::annotate::strucvars::guess_sv_caller; use noodles_vcf as vcf; -use thousands::Separable; pub mod header; @@ -58,7 +57,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: let input_sv_callers = args .path_in .iter() - .map(|path_in| guess_sv_caller(path_in)) + .map(guess_sv_caller) .collect::, _>>()?; tracing::info!("processing header..."); From 75aec552f31af35966d733074f7837883b21149f Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Fri, 6 Oct 2023 15:49:25 +0200 Subject: [PATCH 06/13] update --- Cargo.lock | 66 ++++++++++++++++++++++++++++-------------------------- Cargo.toml | 3 +-- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d8ac9d3..bf39b982 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -65,7 +65,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -178,7 +178,7 @@ dependencies = [ "actix-router", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -369,7 +369,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -435,7 +435,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -594,9 +594,9 @@ checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -716,7 +716,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -886,7 +886,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -897,7 +897,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1047,7 +1047,7 @@ checksum = "04d0b288e3bb1d861c4403c1774a6f7a798781dfc519b3647df2a3dd4ae95f25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1228,7 +1228,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -1671,9 +1671,9 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "librocksdb-sys" @@ -1795,7 +1795,9 @@ dependencies = [ [[package]] name = "mehari" -version = "0.10.0" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2cb639d81b95108663ee1ab919440b85d8e6b3aa06d3c4b1276ac52e74d6bbc" dependencies = [ "actix-web", "annonars", @@ -2191,7 +2193,7 @@ dependencies = [ "regex", "regex-syntax", "structmeta", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2331,7 +2333,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" dependencies = [ "proc-macro2", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2360,9 +2362,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.67" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" +checksum = "5b1106fec09662ec6dd98ccac0f81cef56984d0b49f75c92d8cbad76e20c005c" dependencies = [ "unicode-ident", ] @@ -2409,7 +2411,7 @@ dependencies = [ "prost", "prost-types", "regex", - "syn 2.0.37", + "syn 2.0.38", "tempfile", "which", ] @@ -2424,7 +2426,7 @@ dependencies = [ "itertools", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2679,7 +2681,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.37", + "syn 2.0.38", "unicode-ident", ] @@ -2835,7 +2837,7 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -2897,7 +2899,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3044,7 +3046,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3055,7 +3057,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3077,7 +3079,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3099,9 +3101,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.37" +version = "2.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" +checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b" dependencies = [ "proc-macro2", "quote", @@ -3163,7 +3165,7 @@ checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3303,7 +3305,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", ] [[package]] @@ -3521,7 +3523,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-shared", ] @@ -3543,7 +3545,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.37", + "syn 2.0.38", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index e3a595be..c0a50dac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,8 +29,7 @@ hgvs = "0.11" indexmap = { version = "2.0", features = ["serde"] } itertools = "0.11" log = "0.4" -#mehari = "0.11" -mehari = { path = "../mehari" } +mehari = "0.11" multimap = "0.9" procfs = "0.15" prost = "0.12" From 561a1c63feae97c13e3cd9e0ec15ef6114fabb82 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 09:19:39 +0200 Subject: [PATCH 07/13] wip --- Cargo.lock | 4 +- Cargo.toml | 6 +- src/common.rs | 10 ++ src/strucvars/ingest/header.rs | 23 ++- src/strucvars/ingest/mod.rs | 135 ++++++++++++++++-- ...d_output_header_37@delly2-min.vcf.snap.new | 61 ++++++++ ...tput_header_37@dragen-cnv-min.vcf.snap.new | 57 ++++++++ ...utput_header_37@dragen-sv-min.vcf.snap.new | 57 ++++++++ ...ild_output_header_37@gcnv-min.vcf.snap.new | 57 ++++++++ ...ld_output_header_37@manta-min.vcf.snap.new | 57 ++++++++ ...ild_output_header_37@melt-min.vcf.snap.new | 57 ++++++++ ...d_output_header_37@popdel-min.vcf.snap.new | 61 ++++++++ ...d_output_header_38@delly2-min.vcf.snap.new | 61 ++++++++ ...tput_header_38@dragen-cnv-min.vcf.snap.new | 57 ++++++++ ...utput_header_38@dragen-sv-min.vcf.snap.new | 57 ++++++++ ...ild_output_header_38@gcnv-min.vcf.snap.new | 57 ++++++++ ...ld_output_header_38@manta-min.vcf.snap.new | 57 ++++++++ ...ild_output_header_38@melt-min.vcf.snap.new | 57 ++++++++ ...d_output_header_38@popdel-min.vcf.snap.new | 61 ++++++++ ...ngest__test__smoke_test_singleton.snap.new | 61 ++++++++ ...rs__ingest__test__smoke_test_trio.snap.new | 62 ++++++++ 21 files changed, 1097 insertions(+), 18 deletions(-) create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new create mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new diff --git a/Cargo.lock b/Cargo.lock index bf39b982..69a2c408 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1796,8 +1796,6 @@ dependencies = [ [[package]] name = "mehari" version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2cb639d81b95108663ee1ab919440b85d8e6b3aa06d3c4b1276ac52e74d6bbc" dependencies = [ "actix-web", "annonars", @@ -3455,6 +3453,8 @@ dependencies = [ "procfs", "prost", "prost-build", + "rand 0.8.5", + "rand_core 0.6.4", "regex", "rocksdb", "rstest", diff --git a/Cargo.toml b/Cargo.toml index c0a50dac..476110b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,8 @@ hgvs = "0.11" indexmap = { version = "2.0", features = ["serde"] } itertools = "0.11" log = "0.4" -mehari = "0.11" +# mehari = "0.11" +mehari = { path = "../mehari" } multimap = "0.9" procfs = "0.15" prost = "0.12" @@ -50,7 +51,8 @@ uuid = { version = "1.4", features = ["v4", "fast-rng", "serde"] } noodles-vcf = "0.40.0" rocksdb = { version = "0.21.0", features = ["multi-threaded-cf"] } noodles-bgzf = "0.24.0" - +rand = "0.8" +rand_core = "0.6" [build-dependencies] prost-build = "0.12" diff --git a/src/common.rs b/src/common.rs index da42a195..87932a95 100644 --- a/src/common.rs +++ b/src/common.rs @@ -285,6 +285,11 @@ pub fn add_contigs_37( ); } + builder = builder.insert( + "x-genome-build".parse()?, + vcf::header::record::Value::from("GRCh37"), + )?; + Ok(builder) } @@ -348,6 +353,11 @@ pub fn add_contigs_38( ); } + builder = builder.insert( + "x-genome-build".parse()?, + vcf::header::record::Value::from("GRCh38"), + )?; + Ok(builder) } diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 8770b2ed..de76cac6 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -35,8 +35,9 @@ fn caller_version(sv_caller: &mehari::annotate::strucvars::SvCaller) -> String { pub fn build_output_header( input_sample_names: &SampleNames, input_sv_callers: &[&mehari::annotate::strucvars::SvCaller], - pedigree: &Option, + pedigree: Option<&mehari::ped::PedigreeByName>, genomebuild: GenomeRelease, + file_date: &str, worker_version: &str, ) -> Result { use vcf::header::record::value::{ @@ -47,6 +48,10 @@ pub fn build_output_header( use vcf::record::genotypes::keys::key; let builder = vcf::Header::builder() + .insert( + "fileDate".parse()?, + vcf::header::record::Value::from(file_date), + )? .add_filter("PASS", Map::::new("All filters passed")) .add_info( vcf::record::info::field::key::IS_IMPRECISE, @@ -272,13 +277,17 @@ mod test { let input_vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? .read_header()?; - let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(path)?]; + let sv_callers = { + let reader = mehari::common::open_read_maybe_gz(path)?; + vec![mehari::annotate::strucvars::guess_sv_caller(reader)?] + }; let sv_caller_refs = sv_callers.iter().collect::>(); let output_vcf_header = super::build_output_header( input_vcf_header.sample_names(), &sv_caller_refs, - &Some(pedigree), + Some(&pedigree), crate::common::GenomeRelease::Grch37, + "20230421", "x.y.z", )?; @@ -311,13 +320,17 @@ mod test { let input_vcf_header = noodles_vcf::reader::Builder .build_from_path(path)? .read_header()?; - let sv_callers = vec![mehari::annotate::strucvars::guess_sv_caller(path)?]; + let sv_callers = { + let reader = mehari::common::open_read_maybe_gz(path)?; + vec![mehari::annotate::strucvars::guess_sv_caller(reader)?] + }; let sv_caller_refs = sv_callers.iter().collect::>(); let output_vcf_header = super::build_output_header( input_vcf_header.sample_names(), &sv_caller_refs, - &Some(pedigree), + Some(&pedigree), crate::common::GenomeRelease::Grch38, + "20230421", "x.y.z", )?; diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index 519885e8..c6006961 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -1,7 +1,10 @@ //! Implementation of `strucvars ingest` subcommand. +use std::io::{BufRead, Write}; + use crate::common::{self, open_write_maybe_gz, worker_version, GenomeRelease}; use mehari::common::open_read_maybe_gz; +use rand_core::SeedableRng; use mehari::annotate::strucvars::guess_sv_caller; use noodles_vcf as vcf; @@ -24,9 +27,93 @@ pub struct Args { /// Path to input files. #[clap(long, required = true)] pub path_in: Vec, + /// Path to coverage VCF files from maelstrom; optional. + #[clap(long)] + pub path_cov_vcf: Vec, /// Path to output file. #[clap(long)] pub path_out: String, + + /// Minimal reciprocal overlap to require. + #[arg(long, default_value_t = 0.8)] + pub min_overlap: f32, + /// Slack to use around break-ends. + #[arg(long, default_value_t = 50)] + pub slack_bnd: i32, + /// Slack to use around insertions. + #[arg(long, default_value_t = 50)] + pub slack_ins: i32, + + /// Seed for random number generator (UUIDs), if any. + #[arg(long)] + pub rng_seed: Option, + /// Value to write to `##fileDate`. + #[arg(long)] + pub file_date: String, +} + +/// Write out variants from input files. +fn process_variants( + pedigree: &mehari::ped::PedigreeByName, + output_writer: &mut vcf::Writer>, + input_readers: &mut [vcf::Reader>], + output_header: &vcf::Header, + input_header: &[vcf::Header], + input_sv_callers: &[mehari::annotate::strucvars::SvCaller], + args: &Args, +) -> Result<(), anyhow::Error> { + // Initialize the random number generator from command line seed if given or local entropy + // source. + let mut rng = if let Some(rng_seed) = args.rng_seed { + rand::rngs::StdRng::seed_from_u64(rng_seed) + } else { + rand::rngs::StdRng::from_entropy() + }; + + // Create temporary directory. We will create one temporary file (containing `jsonl` + // seriealized `VarFishStrucvarTsvRecord`s) for each SV type and contig. + let tmp_dir = tempdir::TempDir::new("mehari")?; + + // Read through input VCF files and write out to temporary files. + tracing::info!("converting input VCF files to temporary files..."); + for (mut reader, sv_caller, header) in itertools::izip!( + input_readers.iter_mut(), + input_sv_callers.iter(), + input_header.iter() + ) { + mehari::annotate::strucvars::run_vcf_to_jsonl( + pedigree, + &mut reader, + &header, + &sv_caller, + &tmp_dir, + &mut std::collections::HashMap::new(), + &mut rng, + )?; + } + tracing::info!("... done converting input files"); + + tracing::info!("clustering SVs to output..."); + // Read through temporary files by contig, cluster by overlap as configured, and write to `writer`. + for contig_no in 1..=25 { + tracing::info!( + " contig: {}", + annonars::common::cli::CANONICAL[contig_no - 1] + ); + let clusters = mehari::annotate::strucvars::read_and_cluster_for_contig( + &tmp_dir, + contig_no, + args.slack_ins, + args.slack_bnd, + args.min_overlap, + )?; + for record in clusters { + output_writer.write_record(&output_header, &record.try_into()?)?; + } + } + tracing::info!("... done clustering SVs to output"); + + Ok(()) } /// Main entry point for `strucvars ingest` sub command. @@ -57,7 +144,10 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: let input_sv_callers = args .path_in .iter() - .map(guess_sv_caller) + .map(|path| { + let reader = open_read_maybe_gz(path)?; + guess_sv_caller(reader) + }) .collect::, _>>()?; tracing::info!("processing header..."); @@ -85,8 +175,9 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: let output_header = header::build_output_header( sample_names, &input_sv_callers.iter().collect::>(), - &Some(pedigree), + Some(&pedigree), args.genomebuild, + &args.file_date, worker_version(), ) .map_err(|e| anyhow::anyhow!("problem building output header: {}", e))?; @@ -96,13 +187,15 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: .write_header(&output_header) .map_err(|e| anyhow::anyhow!("problem writing header: {}", e))?; - // process_variants( - // &mut output_writer, - // &mut input_readers, - // &output_header, - // &input_header, - // args, - // )?; + process_variants( + &pedigree, + &mut output_writer, + &mut input_readers, + &output_header, + &input_headers, + &input_sv_callers, + args, + )?; tracing::info!( "All of `strucvars ingest` completed in {:?}", @@ -126,6 +219,7 @@ mod test { String::from("tests/strucvars/ingest/delly2-min.vcf"), String::from("tests/strucvars/ingest/popdel-min.vcf"), ], + path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/delly2-min.ped".into(), genomebuild: GenomeRelease::Grch37, path_out: tmpdir @@ -133,6 +227,11 @@ mod test { .to_str() .expect("invalid path") .into(), + min_overlap: 0.8, + slack_bnd: 50, + slack_ins: 50, + rng_seed: Some(42), + file_date: String::from("20230421"), }; super::run(&args_common, &args)?; @@ -154,6 +253,7 @@ mod test { String::from("tests/strucvars/ingest/manta-min.vcf"), String::from("tests/strucvars/ingest/melt-min.vcf"), ], + path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), genomebuild: GenomeRelease::Grch37, path_out: tmpdir @@ -161,6 +261,11 @@ mod test { .to_str() .expect("invalid path") .into(), + min_overlap: 0.8, + slack_bnd: 50, + slack_ins: 50, + rng_seed: Some(42), + file_date: String::from("20230421"), }; super::run(&args_common, &args)?; @@ -180,6 +285,7 @@ mod test { String::from("tests/strucvars/ingest/delly2-min.vcf.gz"), String::from("tests/strucvars/ingest/popdel-min.vcf.gz"), ], + path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/delly2-min.ped".into(), genomebuild: GenomeRelease::Grch37, path_out: tmpdir @@ -187,6 +293,11 @@ mod test { .to_str() .expect("invalid path") .into(), + min_overlap: 0.8, + slack_bnd: 50, + slack_ins: 50, + rng_seed: Some(42), + file_date: String::from("20230421"), }; super::run(&args_common, &args)?; @@ -208,6 +319,7 @@ mod test { String::from("tests/strucvars/ingest/manta-min.vcf.gz"), String::from("tests/strucvars/ingest/melt-min.vcf.gz"), ], + path_cov_vcf: vec![], path_ped: "tests/strucvars/ingest/dragen-cnv-min.ped".into(), genomebuild: GenomeRelease::Grch37, path_out: tmpdir @@ -215,6 +327,11 @@ mod test { .to_str() .expect("invalid path") .into(), + min_overlap: 0.8, + slack_bnd: 50, + slack_ins: 50, + rng_seed: Some(42), + file_date: String::from("20230421"), }; super::run(&args_common, &args)?; diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new new file mode 100644 index 00000000..7a0e0285 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new @@ -0,0 +1,61 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new new file mode 100644 index 00000000..9d6afc3e --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new new file mode 100644 index 00000000..b6fe58c9 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new new file mode 100644 index 00000000..fcfd0768 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new new file mode 100644 index 00000000..8777d619 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new new file mode 100644 index 00000000..f4f5abfe --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new new file mode 100644 index 00000000..d3b52a18 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new @@ -0,0 +1,61 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 298 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new new file mode 100644 index 00000000..ddd3d292 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new @@ -0,0 +1,61 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new new file mode 100644 index 00000000..23fdead5 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new new file mode 100644 index 00000000..bfca1a65 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new new file mode 100644 index 00000000..674093e8 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new new file mode 100644 index 00000000..b39aa237 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new new file mode 100644 index 00000000..1523024c --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new @@ -0,0 +1,57 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new new file mode 100644 index 00000000..c26832d9 --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new @@ -0,0 +1,61 @@ +--- +source: src/strucvars/ingest/header.rs +assertion_line: 338 +expression: "std::fs::read_to_string(out_path_str)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new new file mode 100644 index 00000000..45f5f7ef --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new @@ -0,0 +1,61 @@ +--- +source: src/strucvars/ingest/mod.rs +assertion_line: 213 +expression: "std::fs::read_to_string(&args.path_out)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE + diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new new file mode 100644 index 00000000..08d1786c --- /dev/null +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new @@ -0,0 +1,62 @@ +--- +source: src/strucvars/ingest/mod.rs +assertion_line: 179 +expression: "std::fs::read_to_string(&args.path_out)?" +--- +##fileformat=VCFv4.4 +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##PEDIGREE= +##PEDIGREE= +##PEDIGREE= +##x-varfish-version= +##x-varfish-version= +##x-varfish-version= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother + From e7dfbe542e7739d5ebd62e17ca8780ae23ca35c9 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 09:20:23 +0200 Subject: [PATCH 08/13] wip --- src/strucvars/ingest/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index c6006961..ba672657 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -76,16 +76,16 @@ fn process_variants( // Read through input VCF files and write out to temporary files. tracing::info!("converting input VCF files to temporary files..."); - for (mut reader, sv_caller, header) in itertools::izip!( + for (reader, sv_caller, header) in itertools::izip!( input_readers.iter_mut(), input_sv_callers.iter(), input_header.iter() ) { mehari::annotate::strucvars::run_vcf_to_jsonl( pedigree, - &mut reader, - &header, - &sv_caller, + reader, + header, + sv_caller, &tmp_dir, &mut std::collections::HashMap::new(), &mut rng, @@ -108,7 +108,7 @@ fn process_variants( args.min_overlap, )?; for record in clusters { - output_writer.write_record(&output_header, &record.try_into()?)?; + output_writer.write_record(output_header, &record.try_into()?)?; } } tracing::info!("... done clustering SVs to output"); From 42f994bd2bad35049e40b3a6753a611958aaf9c4 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 09:57:51 +0200 Subject: [PATCH 09/13] wip --- Cargo.lock | 60 ++++++++++++++++-- Cargo.toml | 1 + src/seqvars/ingest/mod.rs | 2 +- src/strucvars/ingest/header.rs | 8 ++- src/strucvars/ingest/mod.rs | 49 +++++++++++++-- ...build_output_header_37@delly2-min.vcf.snap | 6 ++ ...d_output_header_37@delly2-min.vcf.snap.new | 61 ------------------- ...d_output_header_37@dragen-cnv-min.vcf.snap | 6 ++ ...tput_header_37@dragen-cnv-min.vcf.snap.new | 57 ----------------- ...ld_output_header_37@dragen-sv-min.vcf.snap | 6 ++ ...utput_header_37@dragen-sv-min.vcf.snap.new | 57 ----------------- ...__build_output_header_37@gcnv-min.vcf.snap | 6 ++ ...ild_output_header_37@gcnv-min.vcf.snap.new | 57 ----------------- ..._build_output_header_37@manta-min.vcf.snap | 6 ++ ...ld_output_header_37@manta-min.vcf.snap.new | 57 ----------------- ...__build_output_header_37@melt-min.vcf.snap | 6 ++ ...ild_output_header_37@melt-min.vcf.snap.new | 57 ----------------- ...build_output_header_37@popdel-min.vcf.snap | 6 ++ ...d_output_header_37@popdel-min.vcf.snap.new | 61 ------------------- ...build_output_header_38@delly2-min.vcf.snap | 6 ++ ...d_output_header_38@delly2-min.vcf.snap.new | 61 ------------------- ...d_output_header_38@dragen-cnv-min.vcf.snap | 6 ++ ...tput_header_38@dragen-cnv-min.vcf.snap.new | 57 ----------------- ...ld_output_header_38@dragen-sv-min.vcf.snap | 6 ++ ...utput_header_38@dragen-sv-min.vcf.snap.new | 57 ----------------- ...__build_output_header_38@gcnv-min.vcf.snap | 6 ++ ...ild_output_header_38@gcnv-min.vcf.snap.new | 57 ----------------- ..._build_output_header_38@manta-min.vcf.snap | 6 ++ ...ld_output_header_38@manta-min.vcf.snap.new | 57 ----------------- ...__build_output_header_38@melt-min.vcf.snap | 6 ++ ...ild_output_header_38@melt-min.vcf.snap.new | 57 ----------------- ...build_output_header_38@popdel-min.vcf.snap | 6 ++ ...d_output_header_38@popdel-min.vcf.snap.new | 61 ------------------- ...__ingest__test__smoke_test_singleton.snap} | 11 +++- ...cvars__ingest__test__smoke_test_trio.snap} | 7 ++- 35 files changed, 208 insertions(+), 828 deletions(-) delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new delete mode 100644 src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new rename src/strucvars/ingest/snapshots/{varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new => varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap} (69%) rename src/strucvars/ingest/snapshots/{varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new => varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap} (83%) diff --git a/Cargo.lock b/Cargo.lock index 69a2c408..08b0a313 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1773,6 +1773,15 @@ dependencies = [ "libc", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "matrixmultiply" version = "0.3.8" @@ -2189,7 +2198,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "regex-syntax", + "regex-syntax 0.7.5", "structmeta", "syn 2.0.38", ] @@ -2597,8 +2606,17 @@ checksum = "ebee201405406dbf528b8b672104ae6d6d63e6d118cb10e4d51abbc7b58044ff" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.3.9", + "regex-syntax 0.7.5", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -2609,9 +2627,15 @@ checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.5", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.7.5" @@ -3333,14 +3357,41 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77" dependencies = [ + "matchers", "nu-ansi-term", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] +[[package]] +name = "tracing-test" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a2c0ff408fe918a94c428a3f2ad04e4afd5c95bbc08fcf868eff750c15728a4" +dependencies = [ + "lazy_static", + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "258bc1c4f8e2e73a977812ab339d503e6feeb92700f6d07a6de4d321522d5c08" +dependencies = [ + "lazy_static", + "quote", + "syn 1.0.109", +] + [[package]] name = "triple_accel" version = "0.4.0" @@ -3472,6 +3523,7 @@ dependencies = [ "thousands", "tracing", "tracing-subscriber", + "tracing-test", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 476110b1..3ac0d36b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ pretty_assertions = "1.4" rstest = "0.18.2" serde_test = "1.0" temp_testdir = "0.2" +tracing-test = "0.2.4" [[bin]] name = "varfish-server-worker" diff --git a/src/seqvars/ingest/mod.rs b/src/seqvars/ingest/mod.rs index 0ae9e9a9..0ad4e0b7 100644 --- a/src/seqvars/ingest/mod.rs +++ b/src/seqvars/ingest/mod.rs @@ -529,7 +529,7 @@ mod test { }; super::run(&args_common, &args)?; - let mut buffer = Vec::new(); + let mut buffer: Vec = Vec::new(); hxdmp::hexdump(&crate::common::read_to_bytes(&args.path_out)?, &mut buffer)?; insta::assert_snapshot!(String::from_utf8_lossy(&buffer)); diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index de76cac6..026f7b39 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use noodles_vcf as vcf; -use vcf::header::SampleNames; +use vcf::header::{record::value::map::AlternativeAllele, SampleNames}; use crate::common::{add_contigs_37, add_contigs_38, GenomeRelease}; @@ -157,7 +157,11 @@ pub fn build_output_header( format::Type::Integer, "Point count (windows/targets/probes)", ), - ); + ) + .add_alternative_allele("DEL".parse()?, Map::::new("Deletion")) + .add_alternative_allele("DUP".parse()?, Map::::new("Duplication")) + .add_alternative_allele("INS".parse()?, Map::::new("Insertion")) + .add_alternative_allele("INV".parse()?, Map::::new("Inversion")); let mut builder = match genomebuild { GenomeRelease::Grch37 => add_contigs_37(builder), diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index ba672657..36f2949f 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -3,7 +3,7 @@ use std::io::{BufRead, Write}; use crate::common::{self, open_write_maybe_gz, worker_version, GenomeRelease}; -use mehari::common::open_read_maybe_gz; +use mehari::{annotate::seqvars::AnnotatedVcfWriter, common::open_read_maybe_gz}; use rand_core::SeedableRng; use mehari::annotate::strucvars::guess_sv_caller; @@ -52,10 +52,40 @@ pub struct Args { pub file_date: String, } +/// Wrapper around noodle's VCF writer that adjusts the record for the worker. +pub struct WriterWrapper { + inner: vcf::Writer>, +} + +impl WriterWrapper { + pub fn new(inner: vcf::Writer>) -> Self { + Self { inner } + } +} + +impl mehari::annotate::seqvars::AnnotatedVcfWriter for WriterWrapper { + fn write_header(&mut self, header: &vcf::Header) -> Result<(), anyhow::Error> { + self.inner + .write_header(header) + .map_err(|e| anyhow::anyhow!("Error writing VCF header: {}", e)) + } + + fn write_record( + &mut self, + header: &vcf::Header, + record: &vcf::Record, + ) -> Result<(), anyhow::Error> { + eprintln!("foo"); + self.inner + .write_record(header, record) + .map_err(|e| anyhow::anyhow!("Error writing VCF record: {}", e)) + } +} + /// Write out variants from input files. fn process_variants( pedigree: &mehari::ped::PedigreeByName, - output_writer: &mut vcf::Writer>, + output_writer: &mut dyn mehari::annotate::seqvars::AnnotatedVcfWriter, input_readers: &mut [vcf::Reader>], output_header: &vcf::Header, input_header: &[vcf::Header], @@ -182,7 +212,9 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: ) .map_err(|e| anyhow::anyhow!("problem building output header: {}", e))?; - let mut output_writer = { vcf::writer::Writer::new(open_write_maybe_gz(&args.path_out)?) }; + let mut output_writer = WriterWrapper::new(vcf::writer::Writer::new(open_write_maybe_gz( + &args.path_out, + )?)); output_writer .write_header(&output_header) .map_err(|e| anyhow::anyhow!("problem writing header: {}", e))?; @@ -208,6 +240,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: mod test { use crate::common::GenomeRelease; + #[tracing_test::traced_test] #[test] fn smoke_test_trio() -> Result<(), anyhow::Error> { let tmpdir = temp_testdir::TempDir::default(); @@ -239,6 +272,7 @@ mod test { Ok(()) } + #[tracing_test::traced_test] #[test] fn smoke_test_singleton() -> Result<(), anyhow::Error> { let tmpdir = temp_testdir::TempDir::default(); @@ -274,6 +308,7 @@ mod test { Ok(()) } + #[tracing_test::traced_test] #[test] fn smoke_test_trio_gz() -> Result<(), anyhow::Error> { let tmpdir = temp_testdir::TempDir::default(); @@ -301,10 +336,13 @@ mod test { }; super::run(&args_common, &args)?; - insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + let mut buffer: Vec = Vec::new(); + hxdmp::hexdump(&crate::common::read_to_bytes(&args.path_out)?, &mut buffer)?; Ok(()) } + + #[tracing_test::traced_test] #[test] fn smoke_test_singleton_gz() -> Result<(), anyhow::Error> { let tmpdir = temp_testdir::TempDir::default(); @@ -335,7 +373,8 @@ mod test { }; super::run(&args_common, &args)?; - insta::assert_snapshot!(std::fs::read_to_string(&args.path_out)?); + let mut buffer: Vec = Vec::new(); + hxdmp::hexdump(&crate::common::read_to_bytes(&args.path_out)?, &mut buffer)?; Ok(()) } diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap index 5f3a360a..ae990259 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new deleted file mode 100644 index 7a0e0285..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap.new +++ /dev/null @@ -1,61 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##SAMPLE= -##SAMPLE= -##PEDIGREE= -##PEDIGREE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap index 18e81a17..c5bd3447 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new deleted file mode 100644 index 9d6afc3e..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap index eb90e9d3..476e6802 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new deleted file mode 100644 index b6fe58c9..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap index 17b62134..39eb3fc8 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new deleted file mode 100644 index fcfd0768..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap index 7d6dc923..26f45c68 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new deleted file mode 100644 index 8777d619..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap index b0e25e2b..f484e76e 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new deleted file mode 100644 index f4f5abfe..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap index d54eeff0..5fb65925 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new deleted file mode 100644 index d3b52a18..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap.new +++ /dev/null @@ -1,61 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 298 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh37 -##SAMPLE= -##SAMPLE= -##SAMPLE= -##PEDIGREE= -##PEDIGREE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap index 903f2959..d4143387 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##SAMPLE= ##SAMPLE= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new deleted file mode 100644 index ddd3d292..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap.new +++ /dev/null @@ -1,61 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##SAMPLE= -##SAMPLE= -##PEDIGREE= -##PEDIGREE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap index c4a97f25..9fa5359b 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new deleted file mode 100644 index 23fdead5..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap index c95b5cb6..36909b99 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new deleted file mode 100644 index bfca1a65..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap index ed1dcf2c..75470d21 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new deleted file mode 100644 index 674093e8..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap index 68d0312d..18f3f237 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new deleted file mode 100644 index b39aa237..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap index 7f41b458..ac3f69e0 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new deleted file mode 100644 index 1523024c..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap.new +++ /dev/null @@ -1,57 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap index a0c6692b..66c65b38 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap @@ -21,6 +21,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -46,6 +50,8 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##contig= +##fileDate=20230421 +##x-genome-build=GRCh38 ##SAMPLE= ##SAMPLE= ##SAMPLE= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new deleted file mode 100644 index c26832d9..00000000 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap.new +++ /dev/null @@ -1,61 +0,0 @@ ---- -source: src/strucvars/ingest/header.rs -assertion_line: 338 -expression: "std::fs::read_to_string(out_path_str)?" ---- -##fileformat=VCFv4.4 -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##INFO= -##FILTER= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##FORMAT= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##contig= -##fileDate=20230421 -##x-genome-build=GRCh38 -##SAMPLE= -##SAMPLE= -##SAMPLE= -##PEDIGREE= -##PEDIGREE= -##PEDIGREE= -##x-varfish-version= -##x-varfish-version= -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother - diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap similarity index 69% rename from src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new rename to src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap index 45f5f7ef..34116bdd 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap.new +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap @@ -1,6 +1,5 @@ --- source: src/strucvars/ingest/mod.rs -assertion_line: 213 expression: "std::fs::read_to_string(&args.path_out)?" --- ##fileformat=VCFv4.4 @@ -22,6 +21,10 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -58,4 +61,10 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE +1 1000 . N . . END=1000;sv_uuid=5e831ca1-477e-9b21-1e3a-ba7a1f21d500;callers=MELTv2.2.2;SVTYPE=INS:ME:ALU;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1/1:.:2:1:1:2:2:.:.:.:. +1 2000 . N . . END=2000;sv_uuid=37ae6bd2-3910-a1ee-09ac-4e992e019381;callers=MELTv2.2.2;SVTYPE=INS:ME:SVA;CARRIERS_HET=0;CARRIERS_HOM_REF=1;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/0:.:4:4:0:4:1:.:.:.:. +1 3000 . N . . END=3000;sv_uuid=52f6d2dd-4397-0164-da3f-c7b517b61024;callers=MELTv2.2.2;SVTYPE=INS:ME:L1;CARRIERS_HET=0;CARRIERS_HOM_REF=1;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/0:.:28:23:0:24:1:.:.:.:. +1 1283844 . N . . END=1284844;sv_uuid=a2242722-6377-cc86-7d51-ad3f130af08a;callers=DRAGEN_CNVv07.021.624.3.10.4;SVTYPE=CNV;CARRIERS_HET=1;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:.:.:.:.:.:.:.:1:.:1 +1 1598413 . N . . END=1598580;sv_uuid=d13451de-7160-efa2-b230-76fd782de967;callers=DRAGEN_SVv07.021.624.3.10.4,MANTAv1.6.0;SVTYPE=DEL;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1/1:PASS:53:2:2:20:20:.:.:.:. +1 4124001 . N . . END=4125000;sv_uuid=ea9f11f8-dfb0-ca08-a881-0f9ea39c3a6a;callers=GATK_GCNVv4.3.0.0;SVTYPE=DEL;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1:.:.:.:.:.:.:.:1:.:1 diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap similarity index 83% rename from src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new rename to src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap index 08d1786c..aaf8bc8d 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap.new +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap @@ -1,6 +1,5 @@ --- source: src/strucvars/ingest/mod.rs -assertion_line: 179 expression: "std::fs::read_to_string(&args.path_out)?" --- ##fileformat=VCFv4.4 @@ -22,6 +21,10 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= ##contig= ##contig= ##contig= @@ -59,4 +62,6 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother +1 586412 . N . . END=586439;sv_uuid=a2242722-6377-cc86-7d51-ad3f130af08a;callers=DELLYv1.1.3;SVTYPE=DEL;CARRIERS_HET=3;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:PASS:59:0:0:11:4:.:.:.:. 0/1:PASS:22:0:0:8:2:.:.:.:. 0/1:LowQual:10:0:0:13:2:.:.:.:. +1 1224181 . N . . END=1225801;sv_uuid=d13451de-7160-efa2-b230-76fd782de967;callers=POPDELv1.1.2;SVTYPE=DEL;CARRIERS_HET=3;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:.:4:.:.:.:.:.:.:.:. 0/1:.:7:.:.:.:.:.:.:.:. 0/1:.:7:.:.:.:.:.:.:.:. From 2633561a8aeb68b5dd38cb16e662cc33f902e8ba Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 10:04:30 +0200 Subject: [PATCH 10/13] wip --- src/common.rs | 4 +-- src/strucvars/ingest/header.rs | 26 ++++++++++++------- src/strucvars/ingest/mod.rs | 8 ++++++ ...build_output_header_37@delly2-min.vcf.snap | 3 ++- ...d_output_header_37@dragen-cnv-min.vcf.snap | 3 ++- ...ld_output_header_37@dragen-sv-min.vcf.snap | 3 ++- ...__build_output_header_37@gcnv-min.vcf.snap | 3 ++- ..._build_output_header_37@manta-min.vcf.snap | 3 ++- ...__build_output_header_37@melt-min.vcf.snap | 3 ++- ...build_output_header_37@popdel-min.vcf.snap | 3 ++- ...build_output_header_38@delly2-min.vcf.snap | 3 ++- ...d_output_header_38@dragen-cnv-min.vcf.snap | 3 ++- ...ld_output_header_38@dragen-sv-min.vcf.snap | 3 ++- ...__build_output_header_38@gcnv-min.vcf.snap | 3 ++- ..._build_output_header_38@manta-min.vcf.snap | 3 ++- ...__build_output_header_38@melt-min.vcf.snap | 3 ++- ...build_output_header_38@popdel-min.vcf.snap | 3 ++- ...s__ingest__test__smoke_test_singleton.snap | 3 ++- ...ucvars__ingest__test__smoke_test_trio.snap | 3 ++- 19 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/common.rs b/src/common.rs index 87932a95..bd1c58f7 100644 --- a/src/common.rs +++ b/src/common.rs @@ -286,7 +286,7 @@ pub fn add_contigs_37( } builder = builder.insert( - "x-genome-build".parse()?, + "x-varfish-genome-build".parse()?, vcf::header::record::Value::from("GRCh37"), )?; @@ -354,7 +354,7 @@ pub fn add_contigs_38( } builder = builder.insert( - "x-genome-build".parse()?, + "x-varfish-genome-build".parse()?, vcf::header::record::Value::from("GRCh38"), )?; diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 026f7b39..0d356d9e 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -39,6 +39,7 @@ pub fn build_output_header( genomebuild: GenomeRelease, file_date: &str, worker_version: &str, + case_uuid: &str, ) -> Result { use vcf::header::record::value::{ map::{format, info, Filter, Format, Info}, @@ -232,15 +233,20 @@ pub fn build_output_header( use vcf::header::record::value::map::Other; - let mut builder = builder.insert( - "x-varfish-version".parse()?, - vcf::header::record::Value::Map( - String::from("varfish-server-worker"), - Map::::builder() - .insert("Version".parse()?, worker_version) - .build()?, - ), - )?; + let mut builder = builder + .insert( + "x-varfish-case-uuid".parse()?, + vcf::header::record::Value::from(case_uuid), + )? + .insert( + "x-varfish-version".parse()?, + vcf::header::record::Value::Map( + String::from("varfish-server-worker"), + Map::::builder() + .insert("Version".parse()?, worker_version) + .build()?, + ), + )?; for sv_caller in input_sv_callers.iter() { builder = builder.insert( @@ -293,6 +299,7 @@ mod test { crate::common::GenomeRelease::Grch37, "20230421", "x.y.z", + "d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c", )?; let out_path = tmpdir.join("out.vcf"); @@ -336,6 +343,7 @@ mod test { crate::common::GenomeRelease::Grch38, "20230421", "x.y.z", + "d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c", )?; let out_path = tmpdir.join("out.vcf"); diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index 36f2949f..8a0eff00 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -50,6 +50,9 @@ pub struct Args { /// Value to write to `##fileDate`. #[arg(long)] pub file_date: String, + /// Value to write out for `##x-varfish-case-uuid`. + #[arg(long)] + pub case_uuid: String, } /// Wrapper around noodle's VCF writer that adjusts the record for the worker. @@ -209,6 +212,7 @@ pub fn run(args_common: &crate::common::Args, args: &Args) -> Result<(), anyhow: args.genomebuild, &args.file_date, worker_version(), + &args.case_uuid, ) .map_err(|e| anyhow::anyhow!("problem building output header: {}", e))?; @@ -265,6 +269,7 @@ mod test { slack_ins: 50, rng_seed: Some(42), file_date: String::from("20230421"), + case_uuid: String::from("d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c"), }; super::run(&args_common, &args)?; @@ -300,6 +305,7 @@ mod test { slack_ins: 50, rng_seed: Some(42), file_date: String::from("20230421"), + case_uuid: String::from("d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c"), }; super::run(&args_common, &args)?; @@ -333,6 +339,7 @@ mod test { slack_ins: 50, rng_seed: Some(42), file_date: String::from("20230421"), + case_uuid: String::from("d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c"), }; super::run(&args_common, &args)?; @@ -370,6 +377,7 @@ mod test { slack_ins: 50, rng_seed: Some(42), file_date: String::from("20230421"), + case_uuid: String::from("d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c"), }; super::run(&args_common, &args)?; diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap index ae990259..415b7bb3 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap @@ -51,13 +51,14 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap index c5bd3447..05d1b168 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap index 476e6802..b7f2cc0a 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap index 39eb3fc8..a856da8d 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap index 26f45c68..023275e9 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap index f484e76e..81fd4659 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap index 5fb65925..32daba9b 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap @@ -51,13 +51,14 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap index d4143387..6754545e 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap @@ -51,13 +51,14 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap index 9fa5359b..7713e413 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap index 36909b99..23553354 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap index 75470d21..c88330d4 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap index 18f3f237..d0ebbb13 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap index ac3f69e0..f777ca96 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap index 66c65b38..4ee045e4 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap @@ -51,13 +51,14 @@ expression: "std::fs::read_to_string(out_path_str)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh38 +##x-varfish-genome-build=GRCh38 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap index 34116bdd..84649a84 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap @@ -51,9 +51,10 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= ##x-varfish-version= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap index aaf8bc8d..cbf8993a 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap @@ -51,13 +51,14 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##contig= ##contig= ##fileDate=20230421 -##x-genome-build=GRCh37 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= ##x-varfish-version= From 429428b1fe934281c0ff71f07de562f6d405c386 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 12:55:22 +0200 Subject: [PATCH 11/13] wip --- README.md | 11 + src/strucvars/ingest/header.rs | 12 + src/strucvars/ingest/mod.rs | 214 +++++++++++++++++- ...build_output_header_37@delly2-min.vcf.snap | 2 + ...d_output_header_37@dragen-cnv-min.vcf.snap | 2 + ...ld_output_header_37@dragen-sv-min.vcf.snap | 2 + ...__build_output_header_37@gcnv-min.vcf.snap | 2 + ..._build_output_header_37@manta-min.vcf.snap | 2 + ...__build_output_header_37@melt-min.vcf.snap | 2 + ...build_output_header_37@popdel-min.vcf.snap | 2 + ...build_output_header_38@delly2-min.vcf.snap | 2 + ...d_output_header_38@dragen-cnv-min.vcf.snap | 2 + ...ld_output_header_38@dragen-sv-min.vcf.snap | 2 + ...__build_output_header_38@gcnv-min.vcf.snap | 2 + ..._build_output_header_38@manta-min.vcf.snap | 2 + ...__build_output_header_38@melt-min.vcf.snap | 2 + ...build_output_header_38@popdel-min.vcf.snap | 2 + ...s__ingest__test__smoke_test_singleton.snap | 14 +- ...ucvars__ingest__test__smoke_test_trio.snap | 7 +- tests/strucvars/ingest/delly2-min.vcf | 1 + 20 files changed, 276 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 20093a8c..49cce8e5 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,7 @@ The following `INFO` fields are written: - `SVTYPE` -- type of the variant, one of ``, ``, ``, ``, `BND` - `SVLEN` -- absolute length of the SV for linear variants, `.` for non-linear variants - `SVCLAIM` -- specificaton of `D` (change in abundance), `J` (novel junction), or `DJ` (both change in abundance and novel junction) +- `callers` -- (non-standard field), list of callers that called the variant - `chr2` -- (non-standard field), second chromosome for BND variants - `annsv` -- (non-standard field), annotation of the variant effect on each affected gene @@ -199,6 +200,7 @@ Overall, the command will emit the following header rows in addition to the `##c ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -212,12 +214,20 @@ Overall, the command will emit the following header rows in addition to the `##c ##FORMAT= ##FORMAT= ##FORMAT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##fileDate=20230421 +##x-varfish-genome-build=GRCh37 ##SAMPLE= ##SAMPLE= ##SAMPLE= ##PEDIGREE= ##PEDIGREE= ##PEDIGREE= +##x-varfish-case-uuid=d2bad2ec-a75d-44b9-bd0a-83a3f1331b7c ##x-varfish-version= ##x-varfish-version= ``` @@ -225,6 +235,7 @@ Overall, the command will emit the following header rows in addition to the `##c > [!NOTE] > The `strucvars ingest` step does not perform any annotation. > It only merges the input VCF files from multiple callers (all files must have the same samples) and converts them into the internal format. +> The `INFO/annsv` field is filled by `strucvars query`. # Developer Information diff --git a/src/strucvars/ingest/header.rs b/src/strucvars/ingest/header.rs index 0d356d9e..52c84e08 100644 --- a/src/strucvars/ingest/header.rs +++ b/src/strucvars/ingest/header.rs @@ -74,6 +74,14 @@ pub fn build_output_header( vcf::record::info::field::key::SV_CLAIM, Map::::from(&vcf::record::info::field::key::SV_CLAIM), ) + .add_info( + "callers".parse()?, + Map::::new( + Number::Unknown, + info::Type::String, + "Callers that called the variant", + ), + ) .add_info( "chr2".parse()?, Map::::new( @@ -162,6 +170,10 @@ pub fn build_output_header( .add_alternative_allele("DEL".parse()?, Map::::new("Deletion")) .add_alternative_allele("DUP".parse()?, Map::::new("Duplication")) .add_alternative_allele("INS".parse()?, Map::::new("Insertion")) + .add_alternative_allele( + "CNV".parse()?, + Map::::new("Copy Number Variation"), + ) .add_alternative_allele("INV".parse()?, Map::::new("Inversion")); let mut builder = match genomebuild { diff --git a/src/strucvars/ingest/mod.rs b/src/strucvars/ingest/mod.rs index 8a0eff00..c804e3b4 100644 --- a/src/strucvars/ingest/mod.rs +++ b/src/strucvars/ingest/mod.rs @@ -76,11 +76,219 @@ impl mehari::annotate::seqvars::AnnotatedVcfWriter for WriterWrapper { fn write_record( &mut self, header: &vcf::Header, - record: &vcf::Record, + input_record: &vcf::Record, ) -> Result<(), anyhow::Error> { - eprintln!("foo"); + // copy over CHROM, POS, REF + let mut builder = vcf::Record::builder() + .set_chromosome(input_record.chromosome().clone()) + .set_position(input_record.position()) + .set_reference_bases(input_record.reference_bases().clone()); + + // copy over first ALT allele, remove any SV sub types + if input_record.alternate_bases().len() != 1 { + anyhow::bail!( + "unexpected number of ALT alleles (should be ==1) in: {:?}", + input_record.alternate_bases() + ); + } + let alt_0 = &input_record.alternate_bases()[0]; + let sv_type; + let bnd; + match alt_0 { + vcf::record::alternate_bases::Allele::Breakend(bnd_string) => { + sv_type = "BND".parse()?; + builder = builder + .set_alternate_bases(vcf::record::AlternateBases::from(vec![alt_0.clone()])); + bnd = Some( + mehari::annotate::strucvars::bnd::Breakend::from_ref_alt_str( + &format!("{}", input_record.reference_bases()), + bnd_string, + )?, + ); + } + vcf::record::alternate_bases::Allele::Symbol(symbol) => match symbol { + vcf::record::alternate_bases::allele::Symbol::StructuralVariant(sv) => { + sv_type = sv.ty(); + builder = builder + .set_alternate_bases(vcf::record::AlternateBases::from( + vec![vcf::record::alternate_bases::Allele::Symbol( + vcf::record::alternate_bases::allele::Symbol::StructuralVariant( + vcf::record::alternate_bases::allele::symbol::structural_variant::StructuralVariant::from( + sv.ty() + ) + ) + )] + )); + bnd = None; + } + _ => anyhow::bail!("unexpected symbolic allele: {:?}", &symbol), + }, + _ => anyhow::bail!("unexpected alternate base type: {:?}", &alt_0), + } + + // copy over FORMAT tags, all except FT + let mut keys_with_value = std::collections::HashSet::::new(); + let output_format_values = input_record + .genotypes() + .values() + .map(|g| { + g.keys() + .iter() + .zip(g.values().iter()) + .filter(|(k, _)| k.as_ref() != "FT") + .map(|(k, v)| { + if v.is_some() { + keys_with_value.insert(k.as_ref().to_string()); + } + + v.clone() + }) + .collect::>() + }) + .collect::>(); + let output_keys = vcf::record::genotypes::Keys::try_from( + input_record + .genotypes() + .keys() + .iter() + .filter(|k| k.as_ref() != "FT") + .cloned() + .map(|k| { + if k.as_ref() == "CN" { + "cn".parse().expect("invalid key: cn") + } else { + k + } + }) + .collect::>(), + )?; + builder = builder.set_genotypes(vcf::record::Genotypes::new( + output_keys, + output_format_values, + )); + + // copy over INFO tags + // Note: annsv will be added only in "strucvars query" + let mut info: noodles_vcf::record::Info = Default::default(); + match sv_type { + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Deletion | + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Duplication | + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::CopyNumberVariation => { + let claim = if keys_with_value.contains("pev") || keys_with_value.contains("srv") { + "DJ" + } else { + "D" + }; + info.insert( + vcf::record::info::field::key::SV_CLAIM, + Some(vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::String(vec![Some(claim.to_string())]), + ) + )); + + } + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Insertion | + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Inversion | + vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Breakend => { + info.insert( + vcf::record::info::field::key::SV_CLAIM, + Some(vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::String(vec![Some("J".to_string())]), + ) + )); + }, + } + info.insert( + vcf::record::info::field::key::SV_TYPE, + Some(vcf::record::info::field::Value::String(sv_type.to_string())), + ); + if let Some(Some(vcf::record::info::field::Value::Integer(end))) = input_record + .info() + .get(&vcf::record::info::field::key::END_POSITION) + { + info.insert( + vcf::record::info::field::key::END_POSITION, + Some(vcf::record::info::field::Value::Integer(*end)), + ); + + if sv_type + == vcf::record::alternate_bases::allele::symbol::structural_variant::Type::Breakend + { + info.insert( + "chr2".parse()?, + Some(vcf::record::info::field::value::Value::String( + bnd.expect("must be set here").chrom.clone(), + )), + ); + } else { + let pos: usize = input_record.position().into(); + let sv_len: usize = *end as usize - pos + 1; + info.insert( + vcf::record::info::field::key::SV_LENGTHS, + Some(vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::Integer(vec![Some(sv_len as i32)]), + )), + ); + } + } + + fn map_caller(caller: &str) -> Result, anyhow::Error> { + if caller.starts_with("DELLYv") { + Ok(Some("Delly".to_string())) + } else if caller.starts_with("DRAGEN_CNVv") { + Ok(Some("DragenCnv".to_string())) + } else if caller.starts_with("DRAGEN_SVv") { + Ok(Some("DragenSv".to_string())) + } else if caller.starts_with("GATK_GCNVv") { + Ok(Some("Gcnv".to_string())) + } else if caller.starts_with("MANTAv") { + Ok(Some("Manta".to_string())) + } else if caller.starts_with("POPDELv") { + Ok(Some("Popdel".to_string())) + } else if caller.starts_with("MELTv") { + Ok(Some("Melt".to_string())) + } else { + anyhow::bail!("unknown caller: {}", caller) + } + } + + let key_callers: vcf::record::info::field::Key = "callers".parse()?; + eprintln!("callers = {:?}", &input_record.info().get(&key_callers)); + if let Some(Some(callers)) = input_record.info().get(&key_callers) { + if let vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::String(callers), + ) = callers + { + let output_callers = callers + .iter() + .flatten() + .map(|caller| map_caller(caller)) + .collect::, _>>()?; + info.insert( + key_callers, + Some(vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::String(output_callers), + )), + ); + } else if let vcf::record::info::field::Value::String(caller) = callers { + let output_callers = vec![map_caller(caller)?]; + info.insert( + key_callers, + Some(vcf::record::info::field::Value::Array( + vcf::record::info::field::value::Array::String(output_callers), + )), + ); + } + } else { + anyhow::bail!("no callers INFO tag found"); + } + + builder = builder.set_info(info); + + let record = builder.build()?; + self.inner - .write_record(header, record) + .write_record(header, &record) .map_err(|e| anyhow::anyhow!("Error writing VCF record: {}", e)) } } diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap index 415b7bb3..9bb5424a 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@delly2-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap index 05d1b168..be89862c 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-cnv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap index b7f2cc0a..fbe1cdd8 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@dragen-sv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap index a856da8d..b23f0a1f 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@gcnv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap index 023275e9..4cb5bd71 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@manta-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap index 81fd4659..3074596a 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@melt-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap index 32daba9b..b0312a31 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_37@popdel-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap index 6754545e..c47cfa62 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@delly2-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap index 7713e413..c2954ee3 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-cnv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap index 23553354..6136c98b 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@dragen-sv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap index c88330d4..2f68b496 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@gcnv-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap index d0ebbb13..39bcd070 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@manta-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap index f777ca96..9c15deb7 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@melt-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap index 4ee045e4..484bcf91 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__header__test__build_output_header_38@popdel-min.vcf.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(out_path_str)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap index 84649a84..0c1e99fe 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_singleton.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= @@ -62,10 +64,10 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE -1 1000 . N . . END=1000;sv_uuid=5e831ca1-477e-9b21-1e3a-ba7a1f21d500;callers=MELTv2.2.2;SVTYPE=INS:ME:ALU;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1/1:.:2:1:1:2:2:.:.:.:. -1 2000 . N . . END=2000;sv_uuid=37ae6bd2-3910-a1ee-09ac-4e992e019381;callers=MELTv2.2.2;SVTYPE=INS:ME:SVA;CARRIERS_HET=0;CARRIERS_HOM_REF=1;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/0:.:4:4:0:4:1:.:.:.:. -1 3000 . N . . END=3000;sv_uuid=52f6d2dd-4397-0164-da3f-c7b517b61024;callers=MELTv2.2.2;SVTYPE=INS:ME:L1;CARRIERS_HET=0;CARRIERS_HOM_REF=1;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/0:.:28:23:0:24:1:.:.:.:. -1 1283844 . N . . END=1284844;sv_uuid=a2242722-6377-cc86-7d51-ad3f130af08a;callers=DRAGEN_CNVv07.021.624.3.10.4;SVTYPE=CNV;CARRIERS_HET=1;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:.:.:.:.:.:.:.:1:.:1 -1 1598413 . N . . END=1598580;sv_uuid=d13451de-7160-efa2-b230-76fd782de967;callers=DRAGEN_SVv07.021.624.3.10.4,MANTAv1.6.0;SVTYPE=DEL;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1/1:PASS:53:2:2:20:20:.:.:.:. -1 4124001 . N . . END=4125000;sv_uuid=ea9f11f8-dfb0-ca08-a881-0f9ea39c3a6a;callers=GATK_GCNVv4.3.0.0;SVTYPE=DEL;CARRIERS_HET=0;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=1;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 1:.:.:.:.:.:.:.:1:.:1 +1 1000 . N . . SVCLAIM=J;SVTYPE=INS;END=1000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1/1:2:1:1:2:2:.:.:.:. +1 2000 . N . . SVCLAIM=J;SVTYPE=INS;END=2000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/0:4:4:0:4:1:.:.:.:. +1 3000 . N . . SVCLAIM=J;SVTYPE=INS;END=3000;SVLEN=1;callers=Melt GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/0:28:23:0:24:1:.:.:.:. +1 1283844 . N . . SVCLAIM=D;SVTYPE=CNV;END=1284844;SVLEN=1001;callers=DragenCnv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:.:.:.:.:.:.:1:.:1 +1 1598413 . N . . SVCLAIM=DJ;SVTYPE=DEL;END=1598580;SVLEN=168;callers=DragenSv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1/1:53:2:2:20:20:.:.:.:. +1 4124001 . N . . SVCLAIM=D;SVTYPE=DEL;END=4125000;SVLEN=1000;callers=Gcnv GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 1:.:.:.:.:.:.:1:.:1 diff --git a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap index cbf8993a..cf0405b1 100644 --- a/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap +++ b/src/strucvars/ingest/snapshots/varfish_server_worker__strucvars__ingest__test__smoke_test_trio.snap @@ -8,6 +8,7 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##INFO= ##INFO= ##INFO= +##INFO= ##INFO= ##INFO= ##FILTER= @@ -24,6 +25,7 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##ALT= ##ALT= ##ALT= +##ALT= ##ALT= ##contig= ##contig= @@ -63,6 +65,7 @@ expression: "std::fs::read_to_string(&args.path_out)?" ##x-varfish-version= ##x-varfish-version= #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother -1 586412 . N . . END=586439;sv_uuid=a2242722-6377-cc86-7d51-ad3f130af08a;callers=DELLYv1.1.3;SVTYPE=DEL;CARRIERS_HET=3;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:PASS:59:0:0:11:4:.:.:.:. 0/1:PASS:22:0:0:8:2:.:.:.:. 0/1:LowQual:10:0:0:13:2:.:.:.:. -1 1224181 . N . . END=1225801;sv_uuid=d13451de-7160-efa2-b230-76fd782de967;callers=POPDELv1.1.2;SVTYPE=DEL;CARRIERS_HET=3;CARRIERS_HOM_REF=0;CARRIERS_HOM_ALT=0;CARRIERS_HEMI_REF=0;CARRIERS_HEMI_ALT=0 GT:FT:GQ:pec:pev:src:srv:amq:CN:anc:pc 0/1:.:4:.:.:.:.:.:.:.:. 0/1:.:7:.:.:.:.:.:.:.:. 0/1:.:7:.:.:.:.:.:.:.:. +1 586412 . N . . SVCLAIM=DJ;SVTYPE=DEL;END=586439;SVLEN=28;callers=Delly GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:59:0:0:11:4:.:.:.:. 0/1:22:0:0:8:2:.:.:.:. 0/1:10:0:0:13:2:.:.:.:. +1 1224181 . N . . SVCLAIM=D;SVTYPE=DEL;END=1225801;SVLEN=1621;callers=Popdel GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:4:.:.:.:.:.:.:.:. 0/1:7:.:.:.:.:.:.:.:. 0/1:7:.:.:.:.:.:.:.:. +2 321681 . N G]17:198982] . . SVCLAIM=J;SVTYPE=BND;END=198982;chr2=17;callers=Delly GT:GQ:pec:pev:src:srv:amq:cn:anc:pc 0/1:.:0:.:0:.:.:.:.:. 0/1:.:0:.:0:.:.:.:.:. 0/1:.:0:.:0:.:.:.:.:. diff --git a/tests/strucvars/ingest/delly2-min.vcf b/tests/strucvars/ingest/delly2-min.vcf index b6588dd0..15da9e54 100644 --- a/tests/strucvars/ingest/delly2-min.vcf +++ b/tests/strucvars/ingest/delly2-min.vcf @@ -130,3 +130,4 @@ ##bcftools_viewCommand=view -O z -o work/bwa.delly2.SAMPLE-N1-DNA1-WGS1/out/bwa.delly2.SAMPLE-N1-DNA1-WGS1.vcf.gz /data/gpfs-1/users/holtgrem_c/scratch/tmp/hpc-cpu-164/20220829/tmp.jo3WnHhrQp/cwd/1.bcf; Date=Mon Aug 29 16:04:36 2022 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT index father mother 1 586412 DEL00000004 C 92 PASS PRECISE;SVTYPE=DEL;SVMETHOD=EMBL.DELLYv1.1.3;END=586439;PE=0;MAPQ=0;CT=3to5;CIPOS=-20,20;CIEND=-20,20;SRMAPQ=23;INSLEN=0;HOMLEN=20;SR=4;SRQ=1;CONSENSUS=CTCAGGGTGTTCGGGATAAAGAAGACTCAGGAAGACAAGTATGAAGCATAATCTGTGACATTATTGATATCTTCCTGAAGAACATAATTCCTGCCTACCATCAACAAGCATCAATACTTTCTACCAGCTATTCTCAACCCTCATCATCGGAAGAGACAGACACTGACTGTGTCAAA;CE=1.96018;AC=3;AN=6 GT:GL:GQ:FT:RCL:RC:RCR:RDCN:DR:DV:RR:RV 0/1:-5.90527,0,-14.2974:59:PASS:203:373:203:2:0:0:7:4 0/1:-2.23535,0,-11.3961:22:PASS:188:321:130:2:0:0:6:2 0/1:-1.0313,0,-19.791:10:LowQual:327:515:247:2:0:0:11:2 +2 321681 . G G]17:198982] 6 PASS SVTYPE=BND GT 0/1 0/1 0/1 From ec82649c88f5298eddc9fb62ffc29938c1c3235d Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 13:09:12 +0200 Subject: [PATCH 12/13] wip --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3ac0d36b..92ae0c9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,8 +29,7 @@ hgvs = "0.11" indexmap = { version = "2.0", features = ["serde"] } itertools = "0.11" log = "0.4" -# mehari = "0.11" -mehari = { path = "../mehari" } +mehari = "0.12" multimap = "0.9" procfs = "0.15" prost = "0.12" From b217519eca9bde5b213657914481221d1e7680f8 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 9 Oct 2023 13:23:46 +0200 Subject: [PATCH 13/13] wip --- Cargo.lock | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 08b0a313..4e1b6f7b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1804,7 +1804,9 @@ dependencies = [ [[package]] name = "mehari" -version = "0.11.0" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0065ee55e312f43f41238309e4a311b2b16d02ee7ce7765bcb39a70e1c06417b" dependencies = [ "actix-web", "annonars",