diff --git a/noodles-sam/src/record/data/field/value/base_modifications/group/unmodified_base.rs b/noodles-sam/src/record/data/field/value/base_modifications/group/unmodified_base.rs index d0ccc979a..e6973b15b 100644 --- a/noodles-sam/src/record/data/field/value/base_modifications/group/unmodified_base.rs +++ b/noodles-sam/src/record/data/field/value/base_modifications/group/unmodified_base.rs @@ -17,6 +17,19 @@ pub enum UnmodifiedBase { N, } +impl UnmodifiedBase { + pub fn complement(&self) -> Self { + match self { + Self::A => Self::T, + Self::C => Self::G, + Self::G => Self::C, + Self::T => Self::A, + Self::U => Self::A, + Self::N => Self::N, + } + } +} + /// An error returned when a base modifications group unmodified base fails to parse. #[derive(Clone, Debug, Eq, PartialEq)] pub enum ParseError { @@ -67,6 +80,16 @@ impl From for crate::record::sequence::Base { mod tests { use super::*; + #[test] + fn test_complement() { + assert_eq!(UnmodifiedBase::A.complement(), UnmodifiedBase::T); + assert_eq!(UnmodifiedBase::C.complement(), UnmodifiedBase::G); + assert_eq!(UnmodifiedBase::G.complement(), UnmodifiedBase::C); + assert_eq!(UnmodifiedBase::T.complement(), UnmodifiedBase::A); + assert_eq!(UnmodifiedBase::U.complement(), UnmodifiedBase::A); + assert_eq!(UnmodifiedBase::N.complement(), UnmodifiedBase::N); + } + #[test] fn test_try_from_u8_for_unmodified_base() { fn t(b: u8, expected: UnmodifiedBase) { diff --git a/noodles-sam/src/record/data/field/value/base_modifications/parser.rs b/noodles-sam/src/record/data/field/value/base_modifications/parser.rs index d9f66c9dd..a40cc3110 100644 --- a/noodles-sam/src/record/data/field/value/base_modifications/parser.rs +++ b/noodles-sam/src/record/data/field/value/base_modifications/parser.rs @@ -28,12 +28,18 @@ impl fmt::Display for ParseError { } } -pub(super) fn parse(s: &str, sequence: &Sequence) -> Result { +pub(super) fn parse( + s: &str, + is_reverse_complemented: bool, + sequence: &Sequence, +) -> Result { let mut groups = Vec::new(); let mut src = s.as_bytes(); while !src.is_empty() { - let group = parse_group(&mut src, sequence).map_err(ParseError::InvalidGroup)?; + let group = parse_group(&mut src, is_reverse_complemented, sequence) + .map_err(ParseError::InvalidGroup)?; + groups.push(group); } @@ -51,8 +57,9 @@ mod tests { Group, }; + let is_reverse_complemented = false; let sequence = "CACCCGATGACCGGCT".parse()?; - let actual = parse("C+m,1,3,0;G-o,2;", &sequence); + let actual = parse("C+m,1,3,0;G-o,2;", is_reverse_complemented, &sequence); let expected = BaseModifications(vec![ Group::new( diff --git a/noodles-sam/src/record/data/field/value/base_modifications/parser/group.rs b/noodles-sam/src/record/data/field/value/base_modifications/parser/group.rs index b2888dd9d..80f2c4c16 100644 --- a/noodles-sam/src/record/data/field/value/base_modifications/parser/group.rs +++ b/noodles-sam/src/record/data/field/value/base_modifications/parser/group.rs @@ -60,7 +60,11 @@ impl fmt::Display for ParseError { } } -pub(super) fn parse_group(src: &mut &[u8], sequence: &Sequence) -> Result { +pub(super) fn parse_group( + src: &mut &[u8], + is_reverse_complemented: bool, + sequence: &Sequence, +) -> Result { let unmodified_base = parse_unmodified_base(src).map_err(ParseError::InvalidUnmodifiedBase)?; let strand = parse_strand(src).map_err(ParseError::InvalidStrand)?; let modifications = parse_modifications(src).map_err(ParseError::InvalidModifications)?; @@ -68,7 +72,12 @@ pub(super) fn parse_group(src: &mut &[u8], sequence: &Sequence) -> Result Result<(), ParseError> { fn decode_positions( skip_counts: &[usize], + is_reverse_complemented: bool, sequence: &Sequence, unmodified_base: UnmodifiedBase, ) -> Result, ParseError> { use crate::record::sequence::Base; let mut positions = Vec::with_capacity(skip_counts.len()); - let unmodified_base = Base::from(unmodified_base); - let mut iter = sequence - .as_ref() - .iter() - .enumerate() - .filter(|(_, &base)| base == unmodified_base) - .map(|(i, _)| i); + let mut iter: Box> = if is_reverse_complemented { + let unmodified_base = Base::from(unmodified_base.complement()); + + Box::new( + sequence + .as_ref() + .iter() + .enumerate() + .rev() + .filter(move |(_, &base)| base == unmodified_base) + .map(|(i, _)| i), + ) + } else { + let unmodified_base = Base::from(unmodified_base); + + Box::new( + sequence + .as_ref() + .iter() + .enumerate() + .filter(move |(_, &base)| base == unmodified_base) + .map(|(i, _)| i), + ) + }; for &count in skip_counts { let i = iter.nth(count).unwrap(); @@ -153,10 +180,11 @@ mod tests { modification, Status, Strand, UnmodifiedBase, }; + let is_reverse_complemented = false; let sequence = "CACCCGATGACCGGCT".parse()?; let mut src = &b"C+m,1,3,0;"[..]; - let actual = parse_group(&mut src, &sequence); + let actual = parse_group(&mut src, is_reverse_complemented, &sequence); let expected = Group::new( UnmodifiedBase::C, Strand::Forward, @@ -167,7 +195,7 @@ mod tests { assert_eq!(actual, Ok(expected)); let mut src = &b"C+m.,1,3,0;"[..]; - let actual = parse_group(&mut src, &sequence); + let actual = parse_group(&mut src, is_reverse_complemented, &sequence); let expected = Group::new( UnmodifiedBase::C, Strand::Forward, @@ -177,27 +205,38 @@ mod tests { ); assert_eq!(actual, Ok(expected)); + let mut src = &b"C+m,1,0,0;"[..]; + let actual = parse_group(&mut src, true, &sequence); + let expected = Group::new( + UnmodifiedBase::C, + Strand::Forward, + vec![modification::FIVE_METHYLCYTOSINE], + None, + vec![12, 8, 5], + ); + assert_eq!(actual, Ok(expected)); + let mut src = &b""[..]; assert!(matches!( - parse_group(&mut src, &sequence), + parse_group(&mut src, is_reverse_complemented, &sequence), Err(ParseError::InvalidUnmodifiedBase(_)) )); let mut src = &b"C"[..]; assert!(matches!( - parse_group(&mut src, &sequence), + parse_group(&mut src, is_reverse_complemented, &sequence), Err(ParseError::InvalidStrand(_)) )); let mut src = &b"C+"[..]; assert!(matches!( - parse_group(&mut src, &sequence), + parse_group(&mut src, is_reverse_complemented, &sequence), Err(ParseError::InvalidModifications(_)) )); let mut src = &b"C+m,"[..]; assert!(matches!( - parse_group(&mut src, &sequence), + parse_group(&mut src, is_reverse_complemented, &sequence), Err(ParseError::InvalidSkipCount(_)) ));