Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Track config options, mimick sequence length clamping as implicitly done in hgvs py #206

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/mapper/altseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,17 +437,28 @@ impl AltSeqBuilder {

// Incorporate the variant into the sequence (depending on the type).
let mut is_substitution = false;
let range = if end >= seq.len() && reference.is_some() {
log::warn!(
"Altered sequence range {:?} is incompatible with sequence length {:?}, clamping. Variant description is {}",
start..end,
seq.len(),
&self.var_c
);
start..seq.len()
} else {
start..end
};
match (reference, alternative) {
(Some(reference), Some(alternative)) => {
// delins or SNP
seq.replace_range(start..end, &alternative);
seq.replace_range(range, &alternative);
if reference.len() == 1 && alternative.len() == 1 {
is_substitution = true;
}
}
(Some(_reference), None) => {
// deletion
seq.replace_range(start..end, "");
seq.replace_range(range, "");
}
(None, Some(alternative)) => {
// insertion
Expand Down
9 changes: 8 additions & 1 deletion src/mapper/assembly.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::sync::Arc;

use crate::mapper::error::Error;
use crate::mapper::variant;
use crate::normalizer::Direction;
use crate::parser::HgvsVariant;
use crate::{data::interface::Provider, validator::ValidationLevel};
use biocommons_bioutils::assemblies::Assembly;
Expand Down Expand Up @@ -47,6 +48,8 @@ pub struct Config {
/// Use the genome sequence in case of uncertain g-to-n projections. This
/// can be switched off so genome sequence does not have to be available.
pub genome_seq_available: bool,
pub shuffle_direction: Direction,
pub window_size: usize,
}

impl Default for Config {
Expand All @@ -63,6 +66,8 @@ impl Default for Config {
add_gene_symbol: false,
renormalize_g: true,
genome_seq_available: true,
shuffle_direction: Default::default(),
window_size: 20,
}
}
}
Expand Down Expand Up @@ -111,6 +116,8 @@ impl Mapper {
strict_bounds: config.strict_bounds,
renormalize_g: config.renormalize_g,
genome_seq_available: config.genome_seq_available,
shuffle_direction: config.shuffle_direction,
window_size: config.window_size,
};
let inner = variant::Mapper::new(&inner_config, provider.clone());
let asm_accessions = provider
Expand Down Expand Up @@ -281,7 +288,7 @@ impl Mapper {
/// Normalize variant if requested and ignore errors. This is better than checking whether
/// the variant is intronic because future UTAs will support LRG, which will enable checking
/// intronic variants.
fn maybe_normalize(&self, var: &HgvsVariant) -> Result<HgvsVariant, Error> {
pub fn maybe_normalize(&self, var: &HgvsVariant) -> Result<HgvsVariant, Error> {
if self.config.normalize {
let normalizer = self.inner.normalizer()?;
normalizer.normalize(var).or_else(|_| {
Expand Down
29 changes: 25 additions & 4 deletions src/mapper/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
use cached::SizedCache;
use log::{debug, info};

use super::alignment;
use crate::normalizer::Direction;
use crate::{
data::interface::Provider,
mapper::Error,
Expand All @@ -19,8 +21,6 @@
validator::{ValidationLevel, Validator},
};

use super::alignment;

/// Configuration for Mapper.
///
/// Defaults are taken from `hgvs` Python library.
Expand All @@ -37,6 +37,8 @@
/// Use the genome sequence in case of uncertain g-to-n projections. This
/// can be switched off so genome sequence does not have to be available.
pub genome_seq_available: bool,
pub shuffle_direction: Direction,
pub window_size: usize,
}

impl Default for Config {
Expand All @@ -49,6 +51,8 @@
strict_bounds: true,
renormalize_g: true,
genome_seq_available: true,
shuffle_direction: Default::default(),
window_size: 20,
}
}
}
Expand Down Expand Up @@ -151,6 +155,8 @@
self.validator.clone(),
normalizer::Config {
replace_reference: self.config.replace_reference,
shuffle_direction: self.config.shuffle_direction,
window_size: self.config.window_size,
..Default::default()
},
))
Expand Down Expand Up @@ -255,7 +261,7 @@
(Mu::Certain((*pos_n).clone()), edit_n)
}
} else {
// This is the how the original code handles uncertain positions. We will reach
// This is how the original code handles uncertain positions. We will reach
// here if the position is uncertain and we have the genome sequence.
let pos_g = mapper.n_to_g(pos_n)?;
let edit_n = NaEdit::RefAlt {
Expand Down Expand Up @@ -786,14 +792,29 @@
.loc_range()
.ok_or(Error::NoAlteredSequenceForMissingPositions)?;
let r = ((r.start - interval.start) as usize)..((r.end - interval.start) as usize);
let r = if r.end >= seq.len() {
log::warn!(
"Altered sequence range {:?} is incompatible with sequence length {:?}, clamping. Variant description is {}",
r,
seq.len(),
&var
);
r.start..seq.len()
} else {
r
};

let na_edit = var.na_edit().ok_or(Error::NaEditMissing)?;

match na_edit {
NaEdit::RefAlt { alternative, .. } | NaEdit::NumAlt { alternative, .. } => {
seq.replace_range(r, alternative)
}
NaEdit::DelRef { .. } | NaEdit::DelNum { .. } => seq.replace_range(r, ""),
NaEdit::DelRef { .. } | NaEdit::DelNum { .. } => {
// FIXME the original code in python simply does `del seq[pos_start:pos_end]`,
// which does not error if `pos_end > len(seq)`. Check if this is intended or not.
seq.replace_range(r, "")
}
NaEdit::Ins { alternative } => {
seq.replace_range((r.start + 1)..(r.start + 1), alternative)
}
Expand Down Expand Up @@ -1799,7 +1820,7 @@
pub hgvs_c: String,
#[serde(alias = "HGVSp")]
pub hgvs_p: Option<String>,
pub description: Option<String>,

Check warning on line 1823 in src/mapper/variant.rs

View workflow job for this annotation

GitHub Actions / Testing (full)

fields `description` and `alternatives` are never read
pub alternatives: Option<String>,
}

Expand Down
5 changes: 3 additions & 2 deletions src/normalizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ mod error {
}

/// A direction with respect to a sequence.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
pub enum Direction {
#[default]
ThreeToFive,
FiveToThree,
}
Expand All @@ -77,7 +78,7 @@ impl Default for Config {
Self {
alt_aln_method: "splign".to_string(),
cross_boundaries: false,
shuffle_direction: Direction::FiveToThree,
shuffle_direction: Default::default(),
replace_reference: true,
validate: true,
window_size: 20,
Expand Down
2 changes: 2 additions & 0 deletions src/validator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ impl ExtrinsicValidator {
strict_bounds: true,
renormalize_g: false,
genome_seq_available: true,
shuffle_direction: Default::default(),
window_size: 20,
};
Self {
strict,
Expand Down
Loading