Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Plugin system (#426) #433

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,815 changes: 1,739 additions & 76 deletions Cargo.lock

Large diffs are not rendered by default.

26 changes: 22 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
[workspace]
members = [
"mehari-plugins",
"mehari-plugin-vep-nmd",
]
resolver = "2"

[workspace.package]
homepage = "https://github.com/varfish-org/mehari"
repository = "https://github.com/varfish-org/mehari"
license = "MIT"
edition = "2021"
readme = "README.md"

[package]
name = "mehari"
version = "0.25.5"
edition = "2021"
authors = ["Manuel Holtgrewe <[email protected]>"]
description = "Variant effect prediction all in Rust"
license = "MIT"
homepage = "https://github.com/varfish-org/mehari"
readme = "README.md"
rust-version = "1.64.0"
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
readme.workspace = true

[features]
default = []
Expand Down Expand Up @@ -68,7 +84,7 @@ rustc-hash = "1.1"
seqrepo = "0.10"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_with = { version = "3.6", features=["alloc", "macros", "indexmap_2"], default-features = false }
serde_with = { version = "3.6", features = ["alloc", "macros", "indexmap_2"], default-features = false }
serde_yaml = "0.9"
strum = { version = "0.26", features = ["derive"] }
tempfile = "3"
Expand All @@ -80,6 +96,8 @@ uuid = { version = "1.7", features = ["fast-rng", "serde"] }
zstd = "0.13"
pbjson = "0.6"
pbjson-types = "0.6"
extism = "1.1.0"
mehari-plugins = { path = "mehari-plugins" }

[build-dependencies]
anyhow = "1.0"
Expand Down
2 changes: 2 additions & 0 deletions mehari-plugin-vep-nmd/.cargo/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[build]
target = "wasm32-unknown-unknown"
28 changes: 28 additions & 0 deletions mehari-plugin-vep-nmd/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
name = "mehari-plugin-vep-nmd"
version = "0.1.0"
authors = ["Till Hartmann <[email protected]>"]
description = "NMD plugin for mehari, based on VEP's NMD plugin"
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
readme.workspace = true

# Once https://doc.rust-lang.org/nightly/cargo/reference/unstable.html#per-package-target is stable, use that.
# The .cargo/config.toml is ignored by Cargo (in the parent workspace).
# Running cargo build from the mehari-plugin-vep-nmd directory (instead of from the parent mehari dir)
# does pick up the target.
# forced-target = "wasm32-unknown-unknown"

[lib]
crate_type = ["cdylib"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
extism-pdk = "1.1.0"
extism-convert = "1.2.0"
serde = { version = "1.0.197", features = ["derive"] }
serde_json = "1.0.115"
mehari-plugins = { path = "../mehari-plugins" }
99 changes: 99 additions & 0 deletions mehari-plugin-vep-nmd/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use extism_convert::Json;
use extism_pdk::*;

use mehari_plugins::*;

#[plugin_fn]
pub fn process(Json(tva): Json<TranscriptVariationAllele>) -> FnResult<Option<Json<Annotation>>> {
Ok(transcript_escapes_nmd(&tva).then_some(Json(Annotation::new("NMD".into()))))
}

#[plugin_fn]
pub fn feature_type() -> FnResult<String> {
Ok("Transcript".into())
}

#[plugin_fn]
pub fn header_info() -> FnResult<Json<HeaderInfo>> {
Ok(Json(HeaderInfo {
tag: "NMD".into(),
description: "Nonsense-mediated mRNA decay escaping variants prediction".into(),
}))
}

// Core logic for NMD prediction
fn transcript_escapes_nmd(tva: &TranscriptVariationAllele) -> bool {
// To qualify for NMD, at least one of the following consequences is required:
// "stop_gained", "frameshift_variant", "splice_donor_variant", "splice_acceptor_variant",
if !tva
.overlap_consequences()
.iter()
.any(|oc| INCLUDE_SO.contains(&oc.so_term().as_ref()))
{
return false;
}

let transcript = tva.transcript();
let exons = transcript.exons();
let strand = transcript.strand();
let variant_feature_end_position = tva.variation_feature().seq_region().end();

// Rules for NMD prediction
variant_within_last_exon(variant_feature_end_position, exons)
|| variant_upstream_of_penultimate_exon(variant_feature_end_position, strand, exons)
|| variant_within_first_100_coding_bases(tva.transcript_variation())
|| variant_is_intronless(transcript)
}

// Included SO terms
const INCLUDE_SO: [&str; 4] = [
"stop_gained",
"frameshift_variant",
"splice_donor_variant",
"splice_acceptor_variant",
];

/// Checks whether the variant location falls within the last exon of the transcript
fn variant_within_last_exon(variant_feature_end_position: u64, exons: &[Exon]) -> bool {
let vf_end = variant_feature_end_position;
exons
.last()
.map(|exon| vf_end >= exon.start() && vf_end <= exon.end())
.unwrap_or(false)
}

fn variant_upstream_of_penultimate_exon(
variant_feature_end_position: u64,
strand: isize,
exons: &[Exon],
) -> bool {
let vf_end = variant_feature_end_position;
exons
.get(exons.len() - 2)
.map(|second_last_exon| {
let coding_region_end = if strand == -1 {
second_last_exon.start().saturating_add(51)
} else {
second_last_exon.end().saturating_sub(51)
};
vf_end >= coding_region_end && vf_end <= second_last_exon.end()
})
.unwrap_or(false)
}

fn variant_within_first_100_coding_bases(tv: &TranscriptVariation) -> bool {
tv.cds().end() <= 101
}

fn variant_is_intronless(transcript: &Transcript) -> bool {
!transcript.has_introns()
}

#[cfg(test)]
mod tests {

#[test]
fn test_has_nmd() {
panic!()
}
}
24 changes: 24 additions & 0 deletions mehari-plugins/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[package]
name = "mehari-plugins"
version = "0.1.0"
authors = ["Till Hartmann <[email protected]>"]
edition.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
readme.workspace = true

[dependencies]
serde = { version = "1.0.197", features = ["derive"] }
anyhow = "1.0.81"
getset = "0.1.2"
derive_builder = "0.20.0"
derive-new = "0.6.0"

[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
extism = "1.2.0"

[[bench]]
name = "plugin_calling"
harness = false
151 changes: 151 additions & 0 deletions mehari-plugins/benches/plugin_calling.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};

use anyhow::Result;
use extism::convert::Json;
use extism::{FromBytesOwned, Manifest, Plugin, ToBytes, Wasm};
use mehari_plugins::*;
use std::path::PathBuf;

fn call_nmd_plugin(tva: &TranscriptVariationAllele, plugin: &mut Plugin) -> Result<bool> {
if let Some(Json(result)) = plugin
.call::<Json<TranscriptVariationAllele>, Option<Json<Annotation>>>(
"process",
Json(tva.clone()),
)?
{
Ok(result.annotation == "NMD")
} else {
Ok(false)
}
}

fn call_nmd_native_with_conversion(tva: &TranscriptVariationAllele) -> Result<bool> {
let tva_json = Json(tva).to_bytes().unwrap();
let result = call_nmd_native(tva);
let Json(_tva) =
black_box(Json::<TranscriptVariationAllele>::from_bytes_owned(&tva_json).unwrap());
result
}

fn call_nmd_native(tva: &TranscriptVariationAllele) -> Result<bool> {
fn transcript_escapes_nmd(tva: &TranscriptVariationAllele) -> bool {
// To qualify for NMD, at least one of the following consequences is required:
// "stop_gained", "frameshift_variant", "splice_donor_variant", "splice_acceptor_variant",
if !tva
.overlap_consequences()
.iter()
.any(|oc| INCLUDE_SO.contains(&oc.so_term().as_ref()))
{
return false;
}

let transcript = tva.transcript();
let exons = transcript.exons();
let strand = transcript.strand();
let variant_feature_end_position = tva.variation_feature().seq_region().end();

// Rules for NMD prediction
variant_within_last_exon(variant_feature_end_position, exons)
|| variant_upstream_of_penultimate_exon(variant_feature_end_position, strand, exons)
|| variant_within_first_100_coding_bases(tva.transcript_variation())
|| variant_is_intronless(transcript)
}

// Included SO terms
const INCLUDE_SO: [&str; 4] = [
"stop_gained",
"frameshift_variant",
"splice_donor_variant",
"splice_acceptor_variant",
];

/// Checks whether the variant location falls within the last exon of the transcript
fn variant_within_last_exon(variant_feature_end_position: u64, exons: &[Exon]) -> bool {
let vf_end = variant_feature_end_position;
exons
.last()
.map(|exon| vf_end >= exon.start() && vf_end <= exon.end())
.unwrap_or(false)
}

fn variant_upstream_of_penultimate_exon(
variant_feature_end_position: u64,
strand: isize,
exons: &[Exon],
) -> bool {
let vf_end = variant_feature_end_position;
exons
.get(exons.len() - 2)
.map(|second_last_exon| {
let coding_region_end = if strand == -1 {
second_last_exon.start().saturating_add(51)
} else {
second_last_exon.end().saturating_sub(51)
};
vf_end >= coding_region_end && vf_end <= second_last_exon.end()
})
.unwrap_or(false)
}

fn variant_within_first_100_coding_bases(tv: &TranscriptVariation) -> bool {
tv.cds().end() <= 101
}

fn variant_is_intronless(transcript: &Transcript) -> bool {
!transcript.has_introns()
}
Ok(transcript_escapes_nmd(tva))
}

fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("wasm-vs-native");
group.significance_level(0.1).sample_size(1000);

// setup input data
let tva = TranscriptVariationAlleleBuilder::default()
.transcript(
TranscriptBuilder::default()
.introns(vec![Intron::new(10, 20), Intron::new(30, 40)])
.exons(vec![
Exon::new(0, 10),
Exon::new(20, 30),
Exon::new(40, 100),
])
.strand(-1)
.build()
.unwrap(),
)
.transcript_variation(TranscriptVariation::new(CodingSequence::new(0, 100)))
.variation_feature(VariationFeature::new(SeqRegion::new(15, 25)))
.overlap_consequences(vec![OverlapConsequence::new("stop_gained".into())])
.build()
.unwrap();

// initialize plugin only once
let mut plugin_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
plugin_path.push("../target/wasm32-unknown-unknown/release/mehari_plugin_vep_nmd.wasm");
let url = Wasm::file(plugin_path);
let manifest = Manifest::new([url]);
let mut plugin = Plugin::new(manifest, [], true).unwrap();

group.bench_function("call nmd plugin wasm optimized", |b| {
b.iter(|| call_nmd_plugin(black_box(&tva), black_box(&mut plugin)))
});

group.bench_function("call nmd plugin native", |b| {
// cloning here because `tva` is cloned inside `call_nmd_plugin`
// (and we do not want to measure the difference clone makes)
b.iter(|| call_nmd_native(black_box(&(tva.clone()))))
});

group.bench_function("call nmd plugin with conversion", |b| {
// cloning here because `tva` is cloned inside `call_nmd_plugin`
// (and we do not want to measure the difference clone makes)
b.iter(|| call_nmd_native_with_conversion(black_box(&(tva.clone()))))
});

group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
Loading
Loading