Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for functional elements (#14) #18

Merged
merged 10 commits into from
Nov 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ name = "scarus"
path = "src/main.rs"

[dependencies]
annonars = "0.24.5"
annonars = "0.25.0"
anyhow = "1.0"
bio = "1.4"
biocommons-bioutils = "0.1.4"
Expand Down
16 changes: 16 additions & 0 deletions src/strucvars/eval/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,19 @@
Self { gene, tx_ids }
}
}

/// Functional element.
#[derive(Debug, Clone, PartialEq, serde::Deserialize, serde::Serialize)]
pub enum FunctionalElement {
/// Overlap with functional element with RefSeq.
RefSeq(annonars::pbs::annonars::functional::v1::refseq::Record),
}

impl FunctionalElement {
// Return the identifier of the functional element.
pub fn id(&self) -> &str {

Check warning on line 54 in src/strucvars/eval/common.rs

View check run for this annotation

Codecov / codecov/patch

src/strucvars/eval/common.rs#L54

Added line #L54 was not covered by tests
match self {
Self::RefSeq(record) => &record.id,

Check warning on line 56 in src/strucvars/eval/common.rs

View check run for this annotation

Codecov / codecov/patch

src/strucvars/eval/common.rs#L56

Added line #L56 was not covered by tests
}
}
}
4 changes: 2 additions & 2 deletions src/strucvars/eval/del/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ mod test {
#[tracing_test::traced_test]
#[rstest::rstest]
#[case("1", 11_937_319, 11_944_386, -0.6)] // empty region left of MFN2
#[case("1", 12_098_550, 12_103_898, -0.6)] // empty region right of MFN2
#[case("1", 12_098_550, 12_103_898, 0.0)] // contains regulatory region
#[case("1", 12_050_913, 12_054_733, 0.0)] // contains exon 4 of MFN2
fn test_evaluate(
#[case] chrom: &str,
Expand All @@ -85,7 +85,7 @@ mod test {
) -> Result<(), anyhow::Error> {
mehari::common::set_snapshot_suffix!("{}-{}-{}", chrom, start, stop);

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let result = evaluator.evaluate(&StructuralVariant {
chrom: chrom.into(),
start,
Expand Down
4 changes: 3 additions & 1 deletion src/strucvars/eval/del/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::strucvars::{
data::{clingen_dosage, hgnc::GeneIdInfo},
ds::StructuralVariant,
eval::{
common::{GeneOverlap, ScoreRange, SuggestedScore},
common::{FunctionalElement, GeneOverlap, ScoreRange, SuggestedScore},
result::Pvs1Result,
},
};
Expand Down Expand Up @@ -112,6 +112,8 @@ pub enum L1 {
pub struct L1A {
/// Overlapping transcripts/genes.
pub genes: Vec<GeneOverlap>,
/// Overlapping functional elements.
pub functional_elements: Vec<FunctionalElement>,
}

impl SuggestedScore for L1A {
Expand Down
56 changes: 44 additions & 12 deletions src/strucvars/eval/del/section1.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Implementation of evaluation of copy number loss section 1.

use super::result::{Section, L1, L1A, L1B};
use crate::strucvars::ds::StructuralVariant;
use crate::strucvars::{ds::StructuralVariant, eval::common::FunctionalElement};

/// Evaluation of deletions, loss of copy number.
///
Expand Down Expand Up @@ -38,11 +38,27 @@
.map_err(|e| {
anyhow::anyhow!("issue with overlap computation of {:?}: {}", strucvar, e)
})?;
let functional_elements = self
.parent

Check warning on line 42 in src/strucvars/eval/del/section1.rs

View check run for this annotation

Codecov / codecov/patch

src/strucvars/eval/del/section1.rs#L42

Added line #L42 was not covered by tests
.functional_overlaps(&strucvar.chrom, strucvar.start, strucvar.stop)
.map_err(|e| {
anyhow::anyhow!(
"issue with overlap computation of {:?} with functional elements: {}",
strucvar,
e

Check warning on line 48 in src/strucvars/eval/del/section1.rs

View check run for this annotation

Codecov / codecov/patch

src/strucvars/eval/del/section1.rs#L45-L48

Added lines #L45 - L48 were not covered by tests
)
})?
.into_iter()
.map(FunctionalElement::RefSeq)

Check warning on line 52 in src/strucvars/eval/del/section1.rs

View check run for this annotation

Codecov / codecov/patch

src/strucvars/eval/del/section1.rs#L52

Added line #L52 was not covered by tests
.collect::<Vec<_>>();

if !genes.is_empty() {
Ok(Section::L1(L1::L1A(L1A { genes })))
} else {
if genes.is_empty() && functional_elements.is_empty() {
Ok(Section::L1(L1::L1B(L1B::default())))
} else {
Ok(Section::L1(L1::L1A(L1A {
genes,
functional_elements,
})))
}
}
}
Expand All @@ -56,19 +72,28 @@
use super::Evaluator;

#[rstest::rstest]
#[case("1", 8_412_464, 8_877_699, "gene-RERE")]
#[case("11", 125_423_939, 125_424_204, "no-gene")]
fn evaluate_l1a(
#[case] chrom: &str,
#[case] start: u32,
#[case] stop: u32,
#[case] label: &str,
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = Evaluator::with_parent(&global_evaluator_37);
mehari::common::set_snapshot_suffix!("{}", label);

let evaluator = Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: "1".to_string(),
start: 8412464,
stop: 8877699,
chrom: chrom.to_string(),
start,
stop,
svtype: ds::SvType::Del,
ambiguous_range: None,
};

let res = evaluator.evaluate(&strucvar)?;
eprintln!("{:?}", &res);

assert!(matches!(res, Section::L1(L1::L1A(_))));
insta::assert_yaml_snapshot!(res);
Expand All @@ -77,14 +102,21 @@
}

#[rstest::rstest]
#[case("2", 1, 1, "empty")]
fn evaluate_l1b(
#[case] chrom: &str,
#[case] start: u32,
#[case] stop: u32,
#[case] label: &str,
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = Evaluator::with_parent(&global_evaluator_37);
mehari::common::set_snapshot_suffix!("{}", label);

let evaluator = Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: "22".to_string(),
start: 1,
stop: 1,
chrom: chrom.to_string(),
start,
stop,
svtype: ds::SvType::Del,
ambiguous_range: None,
};
Expand Down
20 changes: 10 additions & 10 deletions src/strucvars/eval/del/section2.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Implementation of evaluation of copy number loss section 2.

use annonars::{clinvar_minimal::pbs::ClinicalSignificance, clinvar_minimal::pbs::ReviewStatus};
use annonars::pbs::annonars::clinvar::v1::minimal::{ClinicalSignificance, ReviewStatus};
use bio::bio_types::genome::AbstractInterval;
use hgvs::data::interface::{Provider, TxExonsRecord, TxInfoRecord};

Expand Down Expand Up @@ -148,15 +148,15 @@ impl<'a> Evaluator<'a> {
.collect::<Vec<_>>();
let hi_regions = clingen_regions
.iter()
.cloned()
.filter(|clingen_region| {
.filter(|&clingen_region| {
let clingen_interval: Interval = clingen_region
.clone()
.try_into()
.expect("no interval for region");
contains(sv_interval, &clingen_interval)
&& clingen_region.haploinsufficiency_score == Score::SufficientEvidence
})
.cloned()
.collect::<Vec<_>>();
if !hi_regions.is_empty() || !hi_genes.is_empty() {
Some(vec![Section::L2(L2::L2A(L2A {
Expand Down Expand Up @@ -669,7 +669,7 @@ mod test {
) -> Result<(), anyhow::Error> {
mehari::common::set_snapshot_suffix!("{}-{}", gene, label);

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let strucvar = ds::StructuralVariant {
chrom: chrom.into(),
start: start as u32,
Expand Down Expand Up @@ -764,7 +764,7 @@ mod test {
ambiguous_range: None,
};
let sv_interval = strucvar.into();
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);
let (genes, _) = global_evaluator_37.clingen_overlaps(&sv_interval);

let res = evaluator.handle_cases_2c_2e(&sv_interval, &genes)?;
Expand Down Expand Up @@ -814,7 +814,7 @@ mod test {
let exons = global_evaluator_37
.provider
.get_tx_exons(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2c(&tx_info, &exons, &sv_interval);
insta::assert_yaml_snapshot!(res);
Expand Down Expand Up @@ -868,7 +868,7 @@ mod test {
let exons = global_evaluator_37
.provider
.get_tx_exons(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2d(&tx_info, &exons, &sv_interval)?;

Expand Down Expand Up @@ -913,7 +913,7 @@ mod test {
let tx_info = global_evaluator_37
.provider
.get_tx_info(tx_id, "NC_000002.11", "splign")?;
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let section = evaluator.handle_case_2e(&tx_info, &sv_interval);
insta::assert_yaml_snapshot!(section);
Expand Down Expand Up @@ -990,7 +990,7 @@ mod test {
};
let sv_interval = strucvar.into();

let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

let res = evaluator.handle_case_2h(&sv_interval)?;
insta::assert_yaml_snapshot!(res);
Expand All @@ -1004,7 +1004,7 @@ mod test {
fn get_gene_hi_predictions(
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

// MFN2 is not in DECIPHER HI.
let res_mfn2 = evaluator.get_gene_hi_predictions("HGNC:16877");
Expand Down
8 changes: 6 additions & 2 deletions src/strucvars/eval/del/section3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl Evaluator {
pub fn evaluate(&self, l1: &Section) -> Result<Section, anyhow::Error> {
tracing::debug!("re-evaluating Section 1 results for Section 3 for {:?}", l1);
let result = match l1 {
Section::L1(L1::L1A(L1A { genes })) => {
Section::L1(L1::L1A(L1A { genes, .. })) => {
if genes.len() < 25 {
Section::L3(L3::L3A(L3Count {
suggested_score: 0.0,
Expand Down Expand Up @@ -93,10 +93,14 @@ mod test {
tx_ids: vec![String::from("fake-tx-id")],
})
.collect();
let functional_elements = Vec::new();

let evaluator = super::Evaluator::new();

let result = evaluator.evaluate(&Section::L1(L1::L1A(L1A { genes })))?;
let result = evaluator.evaluate(&Section::L1(L1::L1A(L1A {
genes,
functional_elements,
})))?;
match result {
Section::L3(L3::L3A(L3Count {
suggested_score,
Expand Down
2 changes: 1 addition & 1 deletion src/strucvars/eval/del/section4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ mod test {
fn test_evaluate(
global_evaluator_37: &super::super::super::Evaluator,
) -> Result<(), anyhow::Error> {
let evaluator = super::Evaluator::with_parent(&global_evaluator_37);
let evaluator = super::Evaluator::with_parent(global_evaluator_37);

// TODO: write test after we have an actual implementation
assert_eq!(
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
source: src/strucvars/eval/del/section1.rs
expression: res
---
L1:
L1A:
genes:
- gene:
hgnc_id: "HGNC:9965"
hgnc_symbol: RERE
ensembl_gene_id: ENSG00000142599
ncbi_gene_id: "473"
tx_ids:
- NM_001042681.2
- NM_001042682.2
- NM_012102.4
functional_elements:
- RefSeq:
chromosome: "1"
start: 8877649
stop: 8877943
id: "id-GeneID:112590823"
dbxref: "GeneID:112590823"
category: 6
regulatory_class: 2
note: ~
experiment: "EXISTENCE:reporter gene assay evidence [ECO:0000049][PMID:27701403]"
function: activates a minimal TATA promoter and a strong SV40 promoter by Sharpr-MPRA in HepG2 and K562 cells
- RefSeq:
chromosome: "1"
start: 8877649
stop: 8877943
id: "id-GeneID:112590823-2"
dbxref: "GeneID:112590823"
category: 5
regulatory_class: ~
note: ~
experiment: ~
function: ~

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
source: src/strucvars/eval/del/section1.rs
expression: res
---
L1:
L1A:
genes: []
functional_elements:
- RefSeq:
chromosome: "11"
start: 125423938
stop: 125424204
id: "id-GeneID:116225306"
dbxref: "GeneID:116225306"
category: 5
regulatory_class: ~
note: ~
experiment: ~
function: "regulatory_interactions: FEZ1"
- RefSeq:
chromosome: "11"
start: 125423938
stop: 125424204
id: "id-GeneID:116225306-2"
dbxref: "GeneID:116225306"
category: 6
regulatory_class: 16
note: ~
experiment: "EXISTENCE:mutant phenotype evidence [ECO:0000015][PMID:30612741]"
function: positively regulates FEZ1 gene expression with high confidence in K562 cells

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ expression: result
tx_ids:
- NM_001127660.2
- NM_014874.4
functional_elements: []
- L3:
L3A:
suggested_score: 0
Expand Down
Loading
Loading