Skip to content

Commit

Permalink
feat: add support for functional elements (#14) (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 19, 2023
1 parent b96499c commit 449d939
Show file tree
Hide file tree
Showing 69 changed files with 1,105 additions and 139 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ name = "scarus"
path = "src/main.rs"

[dependencies]
annonars = "0.24.5"
annonars = "0.25.0"
anyhow = "1.0"
bio = "1.4"
biocommons-bioutils = "0.1.4"
Expand Down
16 changes: 16 additions & 0 deletions src/strucvars/eval/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,19 @@ impl GeneOverlap {
Self { gene, tx_ids }
}
}

/// Functional element.
#[derive(Debug, Clone, PartialEq, serde::Deserialize, serde::Serialize)]
pub enum FunctionalElement {
/// Overlap with functional element with RefSeq.
RefSeq(annonars::pbs::annonars::functional::v1::refseq::Record),
}

impl FunctionalElement {
// Return the identifier of the functional element.
pub fn id(&self) -> &str {
match self {
Self::RefSeq(record) => &record.id,
}
}
}
4 changes: 2 additions & 2 deletions src/strucvars/eval/del/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ mod test {
#[tracing_test::traced_test]
#[rstest::rstest]
#[case("1", 11_937_319, 11_944_386, -0.6)] // empty region left of MFN2
#[case("1", 12_098_550, 12_103_898, -0.6)] // empty region right of MFN2
#[case("1", 12_098_550, 12_103_898, 0.0)] // contains regulatory region
#[case("1", 12_050_913, 12_054_733, 0.0)] // contains exon 4 of MFN2
fn test_evaluate(
#[case] chrom: &str,
Expand All @@ -85,7 +85,7 @@ mod test {
) -> Result<(), anyhow::Error> {
mehari::common::set_snapshot_suffix!("{}-{}-{}", chrom, start, stop);

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let result = evaluator.evaluate(&StructuralVariant {
chrom: chrom.into(),
start,
Expand Down
4 changes: 3 additions & 1 deletion src/strucvars/eval/del/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::strucvars::{
data::{clingen_dosage, hgnc::GeneIdInfo},
ds::StructuralVariant,
eval::{
common::{GeneOverlap, ScoreRange, SuggestedScore},
common::{FunctionalElement, GeneOverlap, ScoreRange, SuggestedScore},
result::Pvs1Result,
},
};
Expand Down Expand Up @@ -112,6 +112,8 @@ pub enum L1 {
pub struct L1A {
/// Overlapping transcripts/genes.
pub genes: Vec<GeneOverlap>,
/// Overlapping functional elements.
pub functional_elements: Vec<FunctionalElement>,
}

impl SuggestedScore for L1A {
Expand Down
56 changes: 44 additions & 12 deletions src/strucvars/eval/del/section1.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Implementation of evaluation of copy number loss section 1.

use super::result::{Section, L1, L1A, L1B};
use crate::strucvars::ds::StructuralVariant;
use crate::strucvars::{ds::StructuralVariant, eval::common::FunctionalElement};

/// Evaluation of deletions, loss of copy number.
///
Expand Down Expand Up @@ -38,11 +38,27 @@ impl<'a> Evaluator<'a> {
.map_err(|e| {
anyhow::anyhow!("issue with overlap computation of {:?}: {}", strucvar, e)
})?;
let functional_elements = self
.parent
.functional_overlaps(&strucvar.chrom, strucvar.start, strucvar.stop)
.map_err(|e| {
anyhow::anyhow!(
"issue with overlap computation of {:?} with functional elements: {}",
strucvar,
e
)
})?
.into_iter()
.map(FunctionalElement::RefSeq)
.collect::<Vec<_>>();

if !genes.is_empty() {
Ok(Section::L1(L1::L1A(L1A { genes })))
} else {
if genes.is_empty() && functional_elements.is_empty() {
Ok(Section::L1(L1::L1B(L1B::default())))
} else {
Ok(Section::L1(L1::L1A(L1A {
genes,
functional_elements,
})))
}
}
}
Expand All @@ -56,19 +72,28 @@ pub mod test {
use super::Evaluator;

#[rstest::rstest]
#[case("1", 8_412_464, 8_877_699, "gene-RERE")]
#[case("11", 125_423_939, 125_424_204, "no-gene")]
fn evaluate_l1a(
#[case] chrom: &str,
#[case] start: u32,
#[case] stop: u32,
#[case] label: &str,
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = Evaluator::with_parent(&global_evaluator_37);
mehari::common::set_snapshot_suffix!("{}", label);

let evaluator = Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: "1".to_string(),
start: 8412464,
stop: 8877699,
chrom: chrom.to_string(),
start,
stop,
svtype: ds::SvType::Del,
ambiguous_range: None,
};

let res = evaluator.evaluate(&strucvar)?;
eprintln!("{:?}", &res);

assert!(matches!(res, Section::L1(L1::L1A(_))));
insta::assert_yaml_snapshot!(res);
Expand All @@ -77,14 +102,21 @@ pub mod test {
}

#[rstest::rstest]
#[case("2", 1, 1, "empty")]
fn evaluate_l1b(
#[case] chrom: &str,
#[case] start: u32,
#[case] stop: u32,
#[case] label: &str,
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = Evaluator::with_parent(&global_evaluator_37);
mehari::common::set_snapshot_suffix!("{}", label);

let evaluator = Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: "22".to_string(),
start: 1,
stop: 1,
chrom: chrom.to_string(),
start,
stop,
svtype: ds::SvType::Del,
ambiguous_range: None,
};
Expand Down
20 changes: 10 additions & 10 deletions src/strucvars/eval/del/section2.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Implementation of evaluation of copy number loss section 2.

use annonars::{clinvar_minimal::pbs::ClinicalSignificance, clinvar_minimal::pbs::ReviewStatus};
use annonars::pbs::annonars::clinvar::v1::minimal::{ClinicalSignificance, ReviewStatus};
use bio::bio_types::genome::AbstractInterval;
use hgvs::data::interface::{Provider, TxExonsRecord, TxInfoRecord};

Expand Down Expand Up @@ -148,15 +148,15 @@ impl<'a> Evaluator<'a> {
.collect::<Vec<_>>();
let hi_regions = clingen_regions
.iter()
.cloned()
.filter(|clingen_region| {
.filter(|&clingen_region| {
let clingen_interval: Interval = clingen_region
.clone()
.try_into()
.expect("no interval for region");
contains(sv_interval, &clingen_interval)
&& clingen_region.haploinsufficiency_score == Score::SufficientEvidence
})
.cloned()
.collect::<Vec<_>>();
if !hi_regions.is_empty() || !hi_genes.is_empty() {
Some(vec![Section::L2(L2::L2A(L2A {
Expand Down Expand Up @@ -669,7 +669,7 @@ mod test {
) -> Result<(), anyhow::Error> {
mehari::common::set_snapshot_suffix!("{}-{}", gene, label);

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: chrom.into(),
start: start as u32,
Expand Down Expand Up @@ -764,7 +764,7 @@ mod test {
ambiguous_range: None,
};
let sv_interval = strucvar.into();
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let (genes, _) = global_evaluator_37.clingen_overlaps(&sv_interval);

let res = evaluator.handle_cases_2c_2e(&sv_interval, &genes)?;
Expand Down Expand Up @@ -814,7 +814,7 @@ mod test {
let exons = global_evaluator_37
.provider
.get_tx_exons(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2c(&tx_info, &exons, &sv_interval);
insta::assert_yaml_snapshot!(res);
Expand Down Expand Up @@ -868,7 +868,7 @@ mod test {
let exons = global_evaluator_37
.provider
.get_tx_exons(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2d(&tx_info, &exons, &sv_interval)?;

Expand Down Expand Up @@ -913,7 +913,7 @@ mod test {
let tx_info = global_evaluator_37
.provider
.get_tx_info(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let section = evaluator.handle_case_2e(&tx_info, &sv_interval);
insta::assert_yaml_snapshot!(section);
Expand Down Expand Up @@ -990,7 +990,7 @@ mod test {
};
let sv_interval = strucvar.into();

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2h(&sv_interval)?;
insta::assert_yaml_snapshot!(res);
Expand All @@ -1004,7 +1004,7 @@ mod test {
fn get_gene_hi_predictions(
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

// MFN2 is not in DECIPHER HI.
let res_mfn2 = evaluator.get_gene_hi_predictions("HGNC:16877");
Expand Down
8 changes: 6 additions & 2 deletions src/strucvars/eval/del/section3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl Evaluator {
pub fn evaluate(&self, l1: &Section) -> Result<Section, anyhow::Error> {
tracing::debug!("re-evaluating Section 1 results for Section 3 for {:?}", l1);
let result = match l1 {
Section::L1(L1::L1A(L1A { genes })) => {
Section::L1(L1::L1A(L1A { genes, .. })) => {
if genes.len() < 25 {
Section::L3(L3::L3A(L3Count {
suggested_score: 0.0,
Expand Down Expand Up @@ -93,10 +93,14 @@ mod test {
tx_ids: vec![String::from("fake-tx-id")],
})
.collect();
let functional_elements = Vec::new();

let evaluator = super::Evaluator::new();

let result = evaluator.evaluate(&Section::L1(L1::L1A(L1A { genes })))?;
let result = evaluator.evaluate(&Section::L1(L1::L1A(L1A {
genes,
functional_elements,
})))?;
match result {
Section::L3(L3::L3A(L3Count {
suggested_score,
Expand Down
2 changes: 1 addition & 1 deletion src/strucvars/eval/del/section4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ mod test {
fn test_evaluate(
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

// TODO: write test after we have an actual implementation
assert_eq!(
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
source: src/strucvars/eval/del/section1.rs
expression: res
---
L1:
L1A:
genes:
- gene:
hgnc_id: "HGNC:9965"
hgnc_symbol: RERE
ensembl_gene_id: ENSG00000142599
ncbi_gene_id: "473"
tx_ids:
- NM_001042681.2
- NM_001042682.2
- NM_012102.4
functional_elements:
- RefSeq:
chromosome: "1"
start: 8877649
stop: 8877943
id: "id-GeneID:112590823"
dbxref: "GeneID:112590823"
category: 6
regulatory_class: 2
note: ~
experiment: "EXISTENCE:reporter gene assay evidence [ECO:0000049][PMID:27701403]"
function: activates a minimal TATA promoter and a strong SV40 promoter by Sharpr-MPRA in HepG2 and K562 cells
- RefSeq:
chromosome: "1"
start: 8877649
stop: 8877943
id: "id-GeneID:112590823-2"
dbxref: "GeneID:112590823"
category: 5
regulatory_class: ~
note: ~
experiment: ~
function: ~

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
source: src/strucvars/eval/del/section1.rs
expression: res
---
L1:
L1A:
genes: []
functional_elements:
- RefSeq:
chromosome: "11"
start: 125423938
stop: 125424204
id: "id-GeneID:116225306"
dbxref: "GeneID:116225306"
category: 5
regulatory_class: ~
note: ~
experiment: ~
function: "regulatory_interactions: FEZ1"
- RefSeq:
chromosome: "11"
start: 125423938
stop: 125424204
id: "id-GeneID:116225306-2"
dbxref: "GeneID:116225306"
category: 6
regulatory_class: 16
note: ~
experiment: "EXISTENCE:mutant phenotype evidence [ECO:0000015][PMID:30612741]"
function: positively regulates FEZ1 gene expression with high confidence in K562 cells

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ expression: result
tx_ids:
- NM_001127660.2
- NM_014874.4
functional_elements: []
- L3:
L3A:
suggested_score: 0
Expand Down
Loading

0 comments on commit 449d939

Please sign in to comment.