Skip to content

Commit

Permalink
feat: provide ClinVar SVs for rule sin Sections 4 (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Nov 20, 2023
1 parent fab1712 commit 2ee2e0a
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 2 deletions.
63 changes: 61 additions & 2 deletions src/strucvars/eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use std::{path::Path, sync::Arc};

use annonars::common::spdi;
use annonars::functional::cli::query::IntervalTrees as FunctionalIntervalTrees;
use annonars::clinvar_sv::cli::query::IntervalTrees as ClinvarSvIntervalTrees;
use annonars::pbs::annonars::clinvar::v1::minimal::Record as ClinvarRecord;
use annonars::pbs::annonars::clinvar::v1::sv::Record as ClinvarSvRecord;
use annonars::pbs::annonars::functional::v1::refseq::Record as FunctionalRecord;
use hgvs::data::interface::Provider;
use itertools::Itertools as _;
Expand Down Expand Up @@ -44,7 +46,9 @@ pub struct Evaluator {

/// The ClinVar minimal database.
clinvar_db: Arc<rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>>,
/// The functional elements database.
/// The ClinVar SV database wrapped/indexed by `IntervalTrees`.
clinvar_sv: ClinvarSvIntervalTrees,
/// The functional elements database wrapped/indexed by `IntervalTrees`.
functional: FunctionalIntervalTrees,
}

Expand All @@ -69,7 +73,7 @@ impl Evaluator {
///
/// If anything goes wrong, it returns a generic `anyhow::Error`.
#[allow(clippy::too_many_arguments)]
pub fn new<P1, P2, P3, P4, P5, P6, P7, P8>(
pub fn new<P1, P2, P3, P4, P5, P6, P7, P8, P9>(
assembly: biocommons_bioutils::assemblies::Assembly,
path_tx_db: P1,
path_hgnc: P2,
Expand All @@ -79,6 +83,7 @@ impl Evaluator {
path_gnomad_constraints: P6,
path_clinvar_minimal: P7,
path_functional: P8,
path_clinvar_sv: P9,
) -> Result<Self, anyhow::Error>
where
P1: AsRef<Path>,
Expand All @@ -89,6 +94,7 @@ impl Evaluator {
P6: AsRef<Path>,
P7: AsRef<Path>,
P8: AsRef<Path>,
P9: AsRef<Path>,
{
let provider = Self::load_provider(assembly, path_tx_db.as_ref())
.map_err(|e| anyhow::anyhow!("failed to load transcript database: {}", e))?;
Expand All @@ -113,6 +119,16 @@ impl Evaluator {
"clinvar_by_accession",
)
.map_err(|e| anyhow::anyhow!("failed to open 'minimal' ClinVar RocksDB: {}", e))?;

let (clinvar_sv_db, clinvar_sv_meta) = annonars::clinvar_sv::cli::query::open_rocksdb(
path_clinvar_sv.as_ref(),
"clinvar_sv",
"meta",
"clinvar_sv_by_rcv",
).map_err(|e| anyhow::anyhow!("failed to open clinvar_sv RocksDB: {}", e))?;
let clinvar_sv = ClinvarSvIntervalTrees::with_db(clinvar_sv_db, "clinvar_sv", clinvar_sv_meta)
.map_err(|e| anyhow::anyhow!("failed to load ClinVar SV data: {}", e))?;

let (functional_db, functional_meta) = annonars::functional::cli::query::open_rocksdb(
path_functional.as_ref(),
"functional",
Expand All @@ -132,6 +148,7 @@ impl Evaluator {
decipher_hi_data,
gnomad_constraint_data,
clinvar_db,
clinvar_sv,
functional,
})
}
Expand Down Expand Up @@ -418,6 +435,27 @@ impl Evaluator {
.query(&spdi::Range::new(chrom.replace("chr", ""), start, stop))
.map_err(|e| anyhow::anyhow!("failed to query functional elements: {}", e))
}

/// Query ClinVar SV records by overlap.
pub fn clinvar_sv_overlaps(
&self,
chrom: &str,
start: u32,
stop: u32,
) -> Result<Vec<ClinvarSvRecord>, anyhow::Error> {
tracing::trace!(
"starting ClinVar SV query {}:{}-{}",
&chrom,
start,
stop
);
let start = start as i32;
let stop = stop as i32;

self.clinvar_sv
.query(&spdi::Range::new(chrom.replace("chr", ""), start, stop))
.map_err(|e| anyhow::anyhow!("failed to query ClinVar SV db: {}", e))
}
}

#[cfg(test)]
Expand All @@ -438,6 +476,7 @@ pub mod test {
"tests/data/strucvars/gnomad_constraints.tsv",
"tests/data/strucvars/hi_ts/clinvar/rocksdb",
"tests/data/strucvars/hi_ts/functional/rocksdb",
"tests/data/strucvars/hi_ts/clinvar-sv/rocksdb",
)
.expect("could not initialize global evaluator")
}
Expand Down Expand Up @@ -496,6 +535,26 @@ pub mod test {
Ok(())
}

/// Test internal working of `clinvar_sv_overlaps`.
#[tracing_test::traced_test]
#[rstest::rstest]
#[case("1", 8_018_845, 8_044_954, "VCV000007063")]
#[case("21", 1, 1, "almost-empty")]
fn clinvar_sv_overlaps(
#[case] chrom: &str,
#[case] start: u32,
#[case] stop: u32,
#[case] label: &str,
global_evaluator_37: &Evaluator,
) -> Result<(), anyhow::Error> {
mehari::common::set_snapshot_suffix!("{}", label);

let result = global_evaluator_37.clinvar_sv_overlaps(chrom, start, stop)?;
insta::assert_yaml_snapshot!(result);

Ok(())
}

/// Test `is_protein_coding`.
#[tracing_test::traced_test]
#[rstest::rstest]
Expand Down
14 changes: 14 additions & 0 deletions tests/data/strucvars/hi_ts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,20 @@ annonars db-utils copy \
--path-out tests/data/strucvars/hi_ts/clinvar-empty/rocksdb \
--path-beds /dev/null

# -- clinvar-sv --

wget -O $TMPDIR/clinvar-data-extract-vars-20231112+0.12.0.tar.gz \
https://github.com/bihealth/clinvar-data-jsonl/releases/download/clinvar-weekly-20231112/clinvar-data-extract-vars-20231112+0.12.0.tar.gz
tar -C $TMPDIR -xvf $TMPDIR/clinvar-data-extract-vars-20231112+0.12.0.tar.gz

mkdir -p tests/data/strucvars/hi_ts/clinvar-sv
rm -rf tests/data/strucvars/hi_ts/clinvar-sv/rocksdb

annonars clinvar-sv import \
--genome-release grch37 \
--path-out-rocksdb tests/data/strucvars/hi_ts/clinvar-sv/rocksdb \
--path-in-jsonl $TMPDIR/clinvar-data-extract-vars-*/clinvar-variants-grch37-strucvars.jsonl.gz

# -- functional elements --

wget \
Expand Down

0 comments on commit 2ee2e0a

Please sign in to comment.