Skip to content

Commit

Permalink
sam/record/data/field/value/base_modifications/parser/group: Decode p…
Browse files Browse the repository at this point in the history
…ositions based on strand orientation
  • Loading branch information
zaeleus committed Jul 14, 2023
1 parent 6efeb33 commit ec4903a
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@ pub enum UnmodifiedBase {
N,
}

impl UnmodifiedBase {
pub fn complement(&self) -> Self {
match self {
Self::A => Self::T,
Self::C => Self::G,
Self::G => Self::C,
Self::T => Self::A,
Self::U => Self::A,
Self::N => Self::N,
}
}
}

/// An error returned when a base modifications group unmodified base fails to parse.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ParseError {
Expand Down Expand Up @@ -67,6 +80,16 @@ impl From<UnmodifiedBase> for crate::record::sequence::Base {
mod tests {
use super::*;

#[test]
fn test_complement() {
assert_eq!(UnmodifiedBase::A.complement(), UnmodifiedBase::T);
assert_eq!(UnmodifiedBase::C.complement(), UnmodifiedBase::G);
assert_eq!(UnmodifiedBase::G.complement(), UnmodifiedBase::C);
assert_eq!(UnmodifiedBase::T.complement(), UnmodifiedBase::A);
assert_eq!(UnmodifiedBase::U.complement(), UnmodifiedBase::A);
assert_eq!(UnmodifiedBase::N.complement(), UnmodifiedBase::N);
}

#[test]
fn test_try_from_u8_for_unmodified_base() {
fn t(b: u8, expected: UnmodifiedBase) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,18 @@ impl fmt::Display for ParseError {
}
}

pub(super) fn parse(s: &str, sequence: &Sequence) -> Result<BaseModifications, ParseError> {
pub(super) fn parse(
s: &str,
is_reverse_complemented: bool,
sequence: &Sequence,
) -> Result<BaseModifications, ParseError> {
let mut groups = Vec::new();
let mut src = s.as_bytes();

while !src.is_empty() {
let group = parse_group(&mut src, sequence).map_err(ParseError::InvalidGroup)?;
let group = parse_group(&mut src, is_reverse_complemented, sequence)
.map_err(ParseError::InvalidGroup)?;

groups.push(group);
}

Expand All @@ -51,8 +57,9 @@ mod tests {
Group,
};

let is_reverse_complemented = false;
let sequence = "CACCCGATGACCGGCT".parse()?;
let actual = parse("C+m,1,3,0;G-o,2;", &sequence);
let actual = parse("C+m,1,3,0;G-o,2;", is_reverse_complemented, &sequence);

let expected = BaseModifications(vec![
Group::new(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,24 @@ impl fmt::Display for ParseError {
}
}

pub(super) fn parse_group(src: &mut &[u8], sequence: &Sequence) -> Result<Group, ParseError> {
pub(super) fn parse_group(
src: &mut &[u8],
is_reverse_complemented: bool,
sequence: &Sequence,
) -> Result<Group, ParseError> {
let unmodified_base = parse_unmodified_base(src).map_err(ParseError::InvalidUnmodifiedBase)?;
let strand = parse_strand(src).map_err(ParseError::InvalidStrand)?;
let modifications = parse_modifications(src).map_err(ParseError::InvalidModifications)?;
let status = parse_status(src);
let skip_counts = parse_skip_counts(src)?;
consume_terminator(src)?;

let positions = decode_positions(&skip_counts, sequence, unmodified_base)?;
let positions = decode_positions(
&skip_counts,
is_reverse_complemented,
sequence,
unmodified_base,
)?;

Ok(Group::new(
unmodified_base,
Expand Down Expand Up @@ -120,20 +129,38 @@ fn consume_terminator(src: &mut &[u8]) -> Result<(), ParseError> {

fn decode_positions(
skip_counts: &[usize],
is_reverse_complemented: bool,
sequence: &Sequence,
unmodified_base: UnmodifiedBase,
) -> Result<Vec<usize>, ParseError> {
use crate::record::sequence::Base;

let mut positions = Vec::with_capacity(skip_counts.len());
let unmodified_base = Base::from(unmodified_base);

let mut iter = sequence
.as_ref()
.iter()
.enumerate()
.filter(|(_, &base)| base == unmodified_base)
.map(|(i, _)| i);
let mut iter: Box<dyn Iterator<Item = usize>> = if is_reverse_complemented {
let unmodified_base = Base::from(unmodified_base.complement());

Box::new(
sequence
.as_ref()
.iter()
.enumerate()
.rev()
.filter(move |(_, &base)| base == unmodified_base)
.map(|(i, _)| i),
)
} else {
let unmodified_base = Base::from(unmodified_base);

Box::new(
sequence
.as_ref()
.iter()
.enumerate()
.filter(move |(_, &base)| base == unmodified_base)
.map(|(i, _)| i),
)
};

for &count in skip_counts {
let i = iter.nth(count).unwrap();
Expand All @@ -153,10 +180,11 @@ mod tests {
modification, Status, Strand, UnmodifiedBase,
};

let is_reverse_complemented = false;
let sequence = "CACCCGATGACCGGCT".parse()?;

let mut src = &b"C+m,1,3,0;"[..];
let actual = parse_group(&mut src, &sequence);
let actual = parse_group(&mut src, is_reverse_complemented, &sequence);
let expected = Group::new(
UnmodifiedBase::C,
Strand::Forward,
Expand All @@ -167,7 +195,7 @@ mod tests {
assert_eq!(actual, Ok(expected));

let mut src = &b"C+m.,1,3,0;"[..];
let actual = parse_group(&mut src, &sequence);
let actual = parse_group(&mut src, is_reverse_complemented, &sequence);
let expected = Group::new(
UnmodifiedBase::C,
Strand::Forward,
Expand All @@ -177,27 +205,38 @@ mod tests {
);
assert_eq!(actual, Ok(expected));

let mut src = &b"C+m,1,0,0;"[..];
let actual = parse_group(&mut src, true, &sequence);
let expected = Group::new(
UnmodifiedBase::C,
Strand::Forward,
vec![modification::FIVE_METHYLCYTOSINE],
None,
vec![12, 8, 5],
);
assert_eq!(actual, Ok(expected));

let mut src = &b""[..];
assert!(matches!(
parse_group(&mut src, &sequence),
parse_group(&mut src, is_reverse_complemented, &sequence),
Err(ParseError::InvalidUnmodifiedBase(_))
));

let mut src = &b"C"[..];
assert!(matches!(
parse_group(&mut src, &sequence),
parse_group(&mut src, is_reverse_complemented, &sequence),
Err(ParseError::InvalidStrand(_))
));

let mut src = &b"C+"[..];
assert!(matches!(
parse_group(&mut src, &sequence),
parse_group(&mut src, is_reverse_complemented, &sequence),
Err(ParseError::InvalidModifications(_))
));

let mut src = &b"C+m,"[..];
assert!(matches!(
parse_group(&mut src, &sequence),
parse_group(&mut src, is_reverse_complemented, &sequence),
Err(ParseError::InvalidSkipCount(_))
));

Expand Down

0 comments on commit ec4903a

Please sign in to comment.