Skip to content

Commit

Permalink
feat: transform "db mk-inhouse" to "strucvars aggregate" (#212) (#217)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Oct 10, 2023
1 parent 7731958 commit b51f027
Show file tree
Hide file tree
Showing 26 changed files with 558 additions and 377 deletions.
148 changes: 148 additions & 0 deletions src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,49 @@ impl std::str::FromStr for GenomeRelease {
}
}

/// Helper type for encoding genotypes in parsing.
#[derive(Copy, Clone)]
pub enum Genotype {
/// hom. ref.
HomRef,
/// het.
Het,
/// hom. alt.
HomAlt,
}

impl std::str::FromStr for Genotype {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"0/0" | "0|0" | "0" => Genotype::HomRef,
"0/1" | "1/0" | "0|1" | "1|0" => Genotype::Het,
"1/1" | "1|1" | "1" => Genotype::HomAlt,
_ => anyhow::bail!("invalid genotype value: {:?}", s),
})
}
}

#[derive(Copy, Clone)]
pub enum Chrom {
Auto, // or chrMT, but does not matter for carrier computation
X,
Y,
}

impl std::str::FromStr for Chrom {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"X" => Chrom::X,
"Y" => Chrom::Y,
_ => Chrom::Auto,
})
}
}

/// The version of `varfish-server-worker` package.
pub const VERSION: &str = env!("CARGO_PKG_VERSION");

Expand Down Expand Up @@ -293,6 +336,99 @@ pub fn add_contigs_37(
Ok(builder)
}

/// Extract a PedigreeByName from the VCF header.
pub fn extract_pedigree_and_case_uuid(
header: &vcf::Header,
) -> Result<(mehari::ped::PedigreeByName, uuid::Uuid), anyhow::Error> {
let mut case_uuid = uuid::Uuid::nil();
let mut pedigree = mehari::ped::PedigreeByName::default();

if let vcf::header::record::value::Collection::Structured(sample_map) = header
.other_records()
.get("SAMPLE")
.ok_or_else(|| anyhow::anyhow!("no SAMPLE record in VCF header"))?
{
for (sample_name, sample_values) in sample_map.iter() {
let sex_value = sample_values
.other_fields()
.get("Sex")
.ok_or_else(|| anyhow::anyhow!("no Sex field in SAMPLE header?"))?
.as_ref();
let sex_value = match sex_value {
"Male" => mehari::ped::Sex::Male,
"Female" => mehari::ped::Sex::Female,
"Unknown" => mehari::ped::Sex::Unknown,
_ => anyhow::bail!("invalid value for Sex: {}", sex_value),
};

let disease_value = sample_values
.other_fields()
.get("Disease")
.ok_or_else(|| anyhow::anyhow!("no Disease field in SAMPLE header?"))?
.as_ref();
let disease_value = match disease_value {
"Affected" => mehari::ped::Disease::Affected,
"Unaffected" => mehari::ped::Disease::Unaffected,
"Unknown" => mehari::ped::Disease::Unknown,
_ => anyhow::bail!("invalid value for Disease: {}", disease_value),
};

pedigree.individuals.insert(
sample_name.clone(),
mehari::ped::Individual {
family: "FAM".into(),
name: sample_name.clone(),
sex: sex_value,
disease: disease_value,
..Default::default()
},
);
}
}

if let vcf::header::record::value::Collection::Structured(sample_map) = header
.other_records()
.get("PEDIGREE")
.ok_or_else(|| anyhow::anyhow!("no PEDIGREE record in VCF header"))?
{
for (sample_name, pedigree_values) in sample_map.iter() {
let father_value = pedigree_values.other_fields().get("Father");
let mother_value = pedigree_values.other_fields().get("Mother");

let individual = pedigree.individuals.get_mut(sample_name).ok_or_else(|| {
anyhow::anyhow!("individual {} not found in SAMPLE header", sample_name)
})?;
if let Some(father_value) = father_value {
individual.father = Some(father_value.clone());
}
if let Some(mother_value) = mother_value {
individual.mother = Some(mother_value.clone());
}
}
}

if let vcf::header::record::value::Collection::Unstructured(lines) = header
.other_records()
.get("x-varfish-case-uuid")
.ok_or_else(|| anyhow::anyhow!("no x-varfish-case-uuid record in VCF header"))?
{
case_uuid = lines
.first()
.ok_or_else(|| {
anyhow::anyhow!("no x-varfish-case-uuid record in VCF header, but expected one")
})?
.parse()
.map_err(|e| {
anyhow::anyhow!(
"could not parse x-varfish-case-uuid record in VCF header: {}",
e
)
})?;
}

Ok((pedigree, case_uuid))
}

/// Add contigs for GRCh38.
pub fn add_contigs_38(
builder: vcf::header::Builder,
Expand Down Expand Up @@ -377,6 +513,7 @@ where

#[cfg(test)]
mod test {
use noodles_vcf as vcf;
use std::io::Read;

#[test]
Expand Down Expand Up @@ -510,4 +647,15 @@ mod test {

Ok(())
}

#[test]
fn extract_pedigree_snapshot() {
let path = "tests/seqvars/aggregate/ingest.vcf";
let mut vcf_reader = vcf::reader::Builder.build_from_path(path).unwrap();
let header = vcf_reader.read_header().unwrap();

let (pedigree, case_uuid) = super::extract_pedigree_and_case_uuid(&header).unwrap();
insta::assert_debug_snapshot!(pedigree);
insta::assert_debug_snapshot!(case_uuid);
}
}
66 changes: 0 additions & 66 deletions src/db/mk_inhouse/input.rs

This file was deleted.

75 changes: 0 additions & 75 deletions src/db/mk_inhouse/output.rs

This file was deleted.

1 change: 0 additions & 1 deletion src/db/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
//! Code supporting the `db *` sub commands.

pub mod conf;
pub mod mk_inhouse;
pub mod pbs;
pub mod to_bin;
2 changes: 1 addition & 1 deletion src/db/to_bin/vardbs/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use serde::Deserialize;
use tracing::error;

use crate::db::mk_inhouse::output::Record as InhouseDbRecord;
use crate::strucvars::aggregate::output::Record as InhouseDbRecord;
use crate::strucvars::query::schema::SvType;

/// dbVar database record as read from TSV file.
Expand Down
2 changes: 1 addition & 1 deletion src/db/to_bin/vardbs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use thousands::Separable;

use crate::common::{build_chrom_map, trace_rss_now};
use crate::db;
use crate::db::mk_inhouse::output::Record as InhouseDbRecord;
use crate::db::pbs::{BackgroundDatabase, BgDbRecord};
use crate::strucvars::aggregate::output::Record as InhouseDbRecord;
use crate::strucvars::query::schema::SvType;

use self::input::InputRecord;
Expand Down
8 changes: 4 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ struct Db {
#[derive(Debug, Subcommand)]
enum DbCommands {
ToBin(db::to_bin::cli::Args),
MkInhouse(db::mk_inhouse::cli::Args),
}

/// Parsing of "sv *" sub commands.
Expand All @@ -67,6 +66,7 @@ struct Strucvars {
/// Enum supporting the parsing of "sv *" sub commands.
#[derive(Debug, Subcommand)]
enum StrucvarsCommands {
Aggregate(strucvars::aggregate::cli::Args),
Ingest(strucvars::ingest::Args),
Query(strucvars::query::Args),
}
Expand Down Expand Up @@ -112,9 +112,6 @@ fn main() -> Result<(), anyhow::Error> {
tracing::subscriber::with_default(collector, || {
match &cli.command {
Commands::Db(db) => match &db.command {
DbCommands::MkInhouse(args) => {
db::mk_inhouse::cli::run(&cli.common, args)?;
}
DbCommands::ToBin(args) => {
db::to_bin::cli::run(&cli.common, args)?;
}
Expand All @@ -131,6 +128,9 @@ fn main() -> Result<(), anyhow::Error> {
}
},
Commands::Strucvars(strucvars) => match &strucvars.command {
StrucvarsCommands::Aggregate(args) => {
strucvars::aggregate::cli::run(&cli.common, args)?;
}
StrucvarsCommands::Ingest(args) => {
strucvars::ingest::run(&cli.common, args)?;
}
Expand Down
Loading

0 comments on commit b51f027

Please sign in to comment.