From 2ee2e0a5410df4fe0860a881c233442903b2b8c6 Mon Sep 17 00:00:00 2001 From: Manuel Holtgrewe Date: Mon, 20 Nov 2023 08:46:02 +0100 Subject: [PATCH] feat: provide ClinVar SVs for rule sin Sections 4 (#8) --- src/strucvars/eval/mod.rs | 63 ++++++++++++++++++++++++- tests/data/strucvars/hi_ts/bootstrap.sh | 14 ++++++ 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/strucvars/eval/mod.rs b/src/strucvars/eval/mod.rs index ab4f9c8..efc43a0 100644 --- a/src/strucvars/eval/mod.rs +++ b/src/strucvars/eval/mod.rs @@ -9,7 +9,9 @@ use std::{path::Path, sync::Arc}; use annonars::common::spdi; use annonars::functional::cli::query::IntervalTrees as FunctionalIntervalTrees; +use annonars::clinvar_sv::cli::query::IntervalTrees as ClinvarSvIntervalTrees; use annonars::pbs::annonars::clinvar::v1::minimal::Record as ClinvarRecord; +use annonars::pbs::annonars::clinvar::v1::sv::Record as ClinvarSvRecord; use annonars::pbs::annonars::functional::v1::refseq::Record as FunctionalRecord; use hgvs::data::interface::Provider; use itertools::Itertools as _; @@ -44,7 +46,9 @@ pub struct Evaluator { /// The ClinVar minimal database. clinvar_db: Arc>, - /// The functional elements database. + /// The ClinVar SV database wrapped/indexed by `IntervalTrees`. + clinvar_sv: ClinvarSvIntervalTrees, + /// The functional elements database wrapped/indexed by `IntervalTrees`. functional: FunctionalIntervalTrees, } @@ -69,7 +73,7 @@ impl Evaluator { /// /// If anything goes wrong, it returns a generic `anyhow::Error`. #[allow(clippy::too_many_arguments)] - pub fn new( + pub fn new( assembly: biocommons_bioutils::assemblies::Assembly, path_tx_db: P1, path_hgnc: P2, @@ -79,6 +83,7 @@ impl Evaluator { path_gnomad_constraints: P6, path_clinvar_minimal: P7, path_functional: P8, + path_clinvar_sv: P9, ) -> Result where P1: AsRef, @@ -89,6 +94,7 @@ impl Evaluator { P6: AsRef, P7: AsRef, P8: AsRef, + P9: AsRef, { let provider = Self::load_provider(assembly, path_tx_db.as_ref()) .map_err(|e| anyhow::anyhow!("failed to load transcript database: {}", e))?; @@ -113,6 +119,16 @@ impl Evaluator { "clinvar_by_accession", ) .map_err(|e| anyhow::anyhow!("failed to open 'minimal' ClinVar RocksDB: {}", e))?; + + let (clinvar_sv_db, clinvar_sv_meta) = annonars::clinvar_sv::cli::query::open_rocksdb( + path_clinvar_sv.as_ref(), + "clinvar_sv", + "meta", + "clinvar_sv_by_rcv", + ).map_err(|e| anyhow::anyhow!("failed to open clinvar_sv RocksDB: {}", e))?; + let clinvar_sv = ClinvarSvIntervalTrees::with_db(clinvar_sv_db, "clinvar_sv", clinvar_sv_meta) + .map_err(|e| anyhow::anyhow!("failed to load ClinVar SV data: {}", e))?; + let (functional_db, functional_meta) = annonars::functional::cli::query::open_rocksdb( path_functional.as_ref(), "functional", @@ -132,6 +148,7 @@ impl Evaluator { decipher_hi_data, gnomad_constraint_data, clinvar_db, + clinvar_sv, functional, }) } @@ -418,6 +435,27 @@ impl Evaluator { .query(&spdi::Range::new(chrom.replace("chr", ""), start, stop)) .map_err(|e| anyhow::anyhow!("failed to query functional elements: {}", e)) } + + /// Query ClinVar SV records by overlap. + pub fn clinvar_sv_overlaps( + &self, + chrom: &str, + start: u32, + stop: u32, + ) -> Result, anyhow::Error> { + tracing::trace!( + "starting ClinVar SV query {}:{}-{}", + &chrom, + start, + stop + ); + let start = start as i32; + let stop = stop as i32; + + self.clinvar_sv + .query(&spdi::Range::new(chrom.replace("chr", ""), start, stop)) + .map_err(|e| anyhow::anyhow!("failed to query ClinVar SV db: {}", e)) + } } #[cfg(test)] @@ -438,6 +476,7 @@ pub mod test { "tests/data/strucvars/gnomad_constraints.tsv", "tests/data/strucvars/hi_ts/clinvar/rocksdb", "tests/data/strucvars/hi_ts/functional/rocksdb", + "tests/data/strucvars/hi_ts/clinvar-sv/rocksdb", ) .expect("could not initialize global evaluator") } @@ -496,6 +535,26 @@ pub mod test { Ok(()) } + /// Test internal working of `clinvar_sv_overlaps`. + #[tracing_test::traced_test] + #[rstest::rstest] + #[case("1", 8_018_845, 8_044_954, "VCV000007063")] + #[case("21", 1, 1, "almost-empty")] + fn clinvar_sv_overlaps( + #[case] chrom: &str, + #[case] start: u32, + #[case] stop: u32, + #[case] label: &str, + global_evaluator_37: &Evaluator, + ) -> Result<(), anyhow::Error> { + mehari::common::set_snapshot_suffix!("{}", label); + + let result = global_evaluator_37.clinvar_sv_overlaps(chrom, start, stop)?; + insta::assert_yaml_snapshot!(result); + + Ok(()) + } + /// Test `is_protein_coding`. #[tracing_test::traced_test] #[rstest::rstest] diff --git a/tests/data/strucvars/hi_ts/bootstrap.sh b/tests/data/strucvars/hi_ts/bootstrap.sh index 8dde625..d6d6551 100644 --- a/tests/data/strucvars/hi_ts/bootstrap.sh +++ b/tests/data/strucvars/hi_ts/bootstrap.sh @@ -77,6 +77,20 @@ annonars db-utils copy \ --path-out tests/data/strucvars/hi_ts/clinvar-empty/rocksdb \ --path-beds /dev/null +# -- clinvar-sv -- + +wget -O $TMPDIR/clinvar-data-extract-vars-20231112+0.12.0.tar.gz \ + https://github.com/bihealth/clinvar-data-jsonl/releases/download/clinvar-weekly-20231112/clinvar-data-extract-vars-20231112+0.12.0.tar.gz +tar -C $TMPDIR -xvf $TMPDIR/clinvar-data-extract-vars-20231112+0.12.0.tar.gz + +mkdir -p tests/data/strucvars/hi_ts/clinvar-sv +rm -rf tests/data/strucvars/hi_ts/clinvar-sv/rocksdb + +annonars clinvar-sv import \ + --genome-release grch37 \ + --path-out-rocksdb tests/data/strucvars/hi_ts/clinvar-sv/rocksdb \ + --path-in-jsonl $TMPDIR/clinvar-data-extract-vars-*/clinvar-variants-grch37-strucvars.jsonl.gz + # -- functional elements -- wget \