Skip to content

Commit

Permalink
gtf/record/attributes: Read string values as text between double quotes
Browse files Browse the repository at this point in the history
This allows the entry delimiter (`;`) to be used in string values.

Fixes #299.
  • Loading branch information
zaeleus committed Sep 4, 2024
1 parent 009eb1b commit e9ee6e9
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 19 deletions.
9 changes: 9 additions & 0 deletions noodles-gtf/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@

[#291]: https://github.com/zaeleus/noodles/issues/291

### Fixed

* gtf/record/attributes: Read string values as text between double quotes
([#299]).

This allows the entry delimiter (`;`) to be used in string values.

[#299]: https://github.com/zaeleus/noodles/issues/299

## 0.30.0 - 2024-07-14

### Changed
Expand Down
16 changes: 9 additions & 7 deletions noodles-gtf/src/record/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl fmt::Display for Attributes {
for (i, entry) in self.0.iter().enumerate() {
write!(f, "{entry}")?;

f.write_char(entry::TERMINATOR)?;
f.write_char(entry::DELIMITER)?;

if i < self.0.len() - 1 {
f.write_char(DELIMITER)?;
Expand Down Expand Up @@ -80,17 +80,19 @@ impl fmt::Display for ParseError {
impl FromStr for Attributes {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(mut s: &str) -> Result<Self, Self::Err> {
use self::entry::parse_entry;

if s.is_empty() {
return Err(ParseError::Empty);
}

let s = s.strip_suffix(entry::TERMINATOR).unwrap_or(s);
let mut entries = Vec::new();

let entries = s
.split(entry::TERMINATOR)
.map(|t| t.trim().parse().map_err(ParseError::InvalidEntry))
.collect::<Result<_, _>>()?;
while !s.is_empty() {
let entry = parse_entry(&mut s).map_err(ParseError::InvalidEntry)?;
entries.push(entry);
}

Ok(Self(entries))
}
Expand Down
72 changes: 60 additions & 12 deletions noodles-gtf/src/record/attributes/entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
use std::{error, fmt, str::FromStr};

const SEPARATOR: char = ' ';
pub(super) const TERMINATOR: char = ';';
const DOUBLE_QUOTES: char = '"';
pub(super) const DELIMITER: char = ';';

/// A GTF record attribute entry.
#[derive(Clone, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -88,27 +89,70 @@ impl fmt::Display for ParseError {
impl FromStr for Entry {
type Err = ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
fn from_str(mut s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(ParseError::Empty)
} else {
parse_entry(s)
parse_entry(&mut s)
}
}
}

fn parse_entry(s: &str) -> Result<Entry, ParseError> {
match s.split_once(SEPARATOR) {
Some((k, v)) => {
let value = parse_value(v);
Ok(Entry::new(k, value))
}
None => Err(ParseError::Invalid),
pub(super) fn parse_entry(s: &mut &str) -> Result<Entry, ParseError> {
let key = parse_key(s)?;
let value = parse_value(s)?;
discard_delimiter(s);
Ok(Entry::new(key, value))
}

fn parse_key<'a>(s: &mut &'a str) -> Result<&'a str, ParseError> {
let Some(i) = s.find(SEPARATOR) else {
return Err(ParseError::Invalid);
};

let (key, rest) = s.split_at(i);
*s = &rest[1..];

Ok(key)
}

fn parse_value<'a>(s: &mut &'a str) -> Result<&'a str, ParseError> {
if let Some(rest) = s.strip_prefix(DOUBLE_QUOTES) {
*s = rest;
parse_string(s)
} else {
parse_raw_value(s)
}
}

fn parse_string<'a>(s: &mut &'a str) -> Result<&'a str, ParseError> {
if let Some(i) = s.find(DOUBLE_QUOTES) {
let (t, rest) = s.split_at(i);
*s = &rest[1..];
Ok(t)
} else {
Err(ParseError::Invalid)
}
}

fn parse_value(s: &str) -> String {
s.trim_matches('"').into()
fn parse_raw_value<'a>(s: &mut &'a str) -> Result<&'a str, ParseError> {
if let Some(i) = s.find(DELIMITER) {
let (t, rest) = s.split_at(i);
*s = rest;
Ok(t)
} else {
Ok(s)
}
}

fn discard_delimiter(s: &mut &str) {
*s = s.trim_start();

if let Some(rest) = s.strip_prefix(DELIMITER) {
*s = rest;
}

*s = s.trim_start();
}

#[cfg(test)]
Expand All @@ -127,6 +171,10 @@ mod tests {
r#"gene_id "g0""#.parse::<Entry>(),
Ok(Entry::new("gene_id", "g0"))
);
assert_eq!(
r#"gene_ids "g0;g1""#.parse::<Entry>(),
Ok(Entry::new("gene_ids", "g0;g1"))
);
assert_eq!(
r#"gene_id """#.parse::<Entry>(),
Ok(Entry::new("gene_id", ""))
Expand Down

0 comments on commit e9ee6e9

Please sign in to comment.