Skip to content

Commit

Permalink
bcf/record: Read static fields from buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Feb 1, 2024
1 parent f51a703 commit dc09abc
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 55 deletions.
12 changes: 5 additions & 7 deletions noodles-bcf/src/async/io/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ use crate::{
/// ```
pub struct Reader<R> {
inner: R,
buf: Vec<u8>,
string_maps: StringMaps,
}

Expand Down Expand Up @@ -154,7 +153,7 @@ where
/// # }
/// ```
pub async fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
read_record(&mut self.inner, &mut self.buf, record).await
read_record(&mut self.inner, record).await
}

/// Returns an (async) stream over lazy records starting from the current (input) stream
Expand Down Expand Up @@ -187,13 +186,13 @@ where
/// ```
pub fn records(&mut self) -> impl Stream<Item = io::Result<Record>> + '_ {
Box::pin(stream::try_unfold(
(&mut self.inner, Vec::new(), Record::default()),
|(mut reader, mut buf, mut record)| async {
read_record(&mut reader, &mut buf, &mut record)
(&mut self.inner, Record::default()),
|(mut reader, mut record)| async {
read_record(&mut reader, &mut record)
.await
.map(|n| match n {
0 => None,
_ => Some((record.clone(), (reader, buf, record))),
_ => Some((record.clone(), (reader, record))),
})
},
))
Expand Down Expand Up @@ -321,7 +320,6 @@ impl<R> From<R> for Reader<R> {
fn from(inner: R) -> Self {
Self {
inner,
buf: Vec::new(),
string_maps: StringMaps::default(),
}
}
Expand Down
7 changes: 4 additions & 3 deletions noodles-bcf/src/async/io/reader/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,11 @@ fn intersects(
chromosome_id: usize,
region_interval: Interval,
) -> io::Result<bool> {
let id = record.chromosome_id();
let id = record.chromosome_id()?;

let start = Position::try_from(usize::from(record.position()))
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
let start = record.position().map(usize::from).and_then(|n| {
Position::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

let end = record.end().map(usize::from).and_then(|n| {
Position::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
Expand Down
32 changes: 16 additions & 16 deletions noodles-bcf/src/async/io/reader/record.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
use crate::Record;
use tokio::io::{self, AsyncRead, AsyncReadExt};

pub(super) async fn read_record<R>(
reader: &mut R,
buf: &mut Vec<u8>,
record: &mut Record,
) -> io::Result<usize>
pub(super) async fn read_record<R>(reader: &mut R, record: &mut Record) -> io::Result<usize>
where
R: AsyncRead + Unpin,
{
Expand All @@ -21,14 +17,19 @@ where
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

buf.resize(l_shared, Default::default());
reader.read_exact(buf).await?;
let site_buf = record.fields_mut().site_buf_mut();
site_buf.resize(l_shared, 0);
reader.read_exact(site_buf).await?;

let buf = site_buf.clone();
let mut buf_reader = &buf[..];
let (n_fmt, n_sample) = read_site(&mut buf_reader, record)?;

let genotypes = record.genotypes.as_mut();
genotypes.resize(l_indiv, Default::default());
reader.read_exact(genotypes).await?;
let samples_buf = record.fields_mut().samples_buf_mut();
samples_buf.resize(l_indiv, 0);
reader.read_exact(samples_buf).await?;

*record.genotypes.as_mut() = samples_buf.clone();
record.genotypes.set_format_count(n_fmt);
record.genotypes.set_sample_count(n_sample);

Expand Down Expand Up @@ -56,14 +57,13 @@ mod tests {
let string_maps: StringMaps = RAW_HEADER.parse()?;

let mut reader = &DATA[..];
let mut buf = Vec::new();
let mut record = Record::default();
read_record(&mut reader, &mut buf, &mut record).await?;
read_record(&mut reader, &mut record).await?;

assert_eq!(record.chromosome_id(), 1);
assert_eq!(record.position(), Position::from(101));
assert_eq!(record.rlen(), 1);
assert_eq!(record.quality_score(), Some(30.1));
assert_eq!(record.chromosome_id()?, 1);
assert_eq!(record.position()?, Position::from(101));
assert_eq!(record.rlen()?, 1);
assert_eq!(record.quality_score()?, Some(30.1));
assert_eq!(record.ids(), &"rs123".parse::<Ids>()?);
assert_eq!(record.reference_bases(), "A");
assert_eq!(
Expand Down
2 changes: 1 addition & 1 deletion noodles-bcf/src/io/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ where
/// # Ok::<(), io::Error>(())
/// ```
pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
read_record(&mut self.inner, &mut self.buf, record)
read_record(&mut self.inner, record)
}

/// Returns an iterator over records starting from the current stream position.
Expand Down
28 changes: 16 additions & 12 deletions noodles-bcf/src/io/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
Record,
};

pub fn read_record<R>(reader: &mut R, buf: &mut Vec<u8>, record: &mut Record) -> io::Result<usize>
pub fn read_record<R>(reader: &mut R, record: &mut Record) -> io::Result<usize>
where
R: Read,
{
Expand All @@ -23,14 +23,19 @@ where
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

buf.resize(l_shared, Default::default());
reader.read_exact(buf)?;
let site_buf = record.fields_mut().site_buf_mut();
site_buf.resize(l_shared, 0);
reader.read_exact(site_buf)?;

let buf = site_buf.clone();
let mut buf_reader = &buf[..];
let (n_fmt, n_sample) = read_site(&mut buf_reader, record)?;

let genotypes = record.genotypes.as_mut();
genotypes.resize(l_indiv, Default::default());
reader.read_exact(genotypes)?;
let samples_buf = record.fields_mut().samples_buf_mut();
samples_buf.resize(l_indiv, 0);
reader.read_exact(samples_buf)?;

*record.genotypes.as_mut() = samples_buf.clone();
record.genotypes.set_format_count(n_fmt);
record.genotypes.set_sample_count(n_sample);

Expand Down Expand Up @@ -158,14 +163,13 @@ pub(crate) mod tests {
let string_maps: StringMaps = RAW_HEADER.parse()?;

let mut reader = &DATA[..];
let mut buf = Vec::new();
let mut record = Record::default();
read_record(&mut reader, &mut buf, &mut record)?;
read_record(&mut reader, &mut record)?;

assert_eq!(record.chromosome_id(), 1);
assert_eq!(record.position(), Position::from(101));
assert_eq!(record.rlen(), 1);
assert_eq!(record.quality_score(), Some(30.1));
assert_eq!(record.chromosome_id()?, 1);
assert_eq!(record.position()?, Position::from(101));
assert_eq!(record.rlen()?, 1);
assert_eq!(record.quality_score()?, Some(30.1));
assert_eq!(record.ids(), &"rs123".parse::<Ids>()?);
assert_eq!(record.reference_bases(), "A");
assert_eq!(
Expand Down
44 changes: 31 additions & 13 deletions noodles-bcf/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

pub(crate) mod codec;
mod convert;
mod fields;
mod filters;
mod genotypes;
mod info;
Expand All @@ -11,6 +12,7 @@ use std::io;

use noodles_vcf as vcf;

use self::fields::Fields;
pub(crate) use self::value::Value;
pub use self::{filters::Filters, genotypes::Genotypes, info::Info};

Expand All @@ -20,6 +22,7 @@ pub type ChromosomeId = usize;
/// A BCF record.
#[derive(Clone, Debug, PartialEq)]
pub struct Record {
fields: Fields,
pub(crate) chrom: ChromosomeId,
pub(crate) pos: vcf::record::Position,
pub(crate) rlen: usize,
Expand All @@ -33,6 +36,10 @@ pub struct Record {
}

impl Record {
pub(crate) fn fields_mut(&mut self) -> &mut Fields {
&mut self.fields
}

/// Returns the chromosome ID of the record.
///
/// The chromosome ID represents an index in the contig string map, which associates an ID (by
Expand All @@ -45,10 +52,12 @@ impl Record {
/// ```
/// use noodles_bcf as bcf;
/// let record = bcf::Record::default();
/// assert_eq!(record.chromosome_id(), 0);
/// assert_eq!(record.chromosome_id()?, 0);
/// # Ok::<_, std::io::Error>(())
/// ```
pub fn chromosome_id(&self) -> ChromosomeId {
self.chrom
pub fn chromosome_id(&self) -> io::Result<usize> {
let n = self.fields.reference_sequence_id();
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}

/// Returns the start position of this record.
Expand All @@ -61,14 +70,21 @@ impl Record {
/// ```
/// use noodles_bcf as bcf;
/// let record = bcf::Record::default();
/// assert_eq!(usize::from(record.position()), 1);
/// assert_eq!(record.position().map(usize::from)?, 1);
/// # Ok::<_, std::io::Error>(())
/// ```
pub fn position(&self) -> vcf::record::Position {
self.pos
pub fn position(&self) -> io::Result<vcf::record::Position> {
let n = self.fields.position();

usize::try_from(n)
.map(|m| m + 1)
.map(vcf::record::Position::from)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}

pub(crate) fn rlen(&self) -> usize {
self.rlen
pub(crate) fn rlen(&self) -> io::Result<usize> {
let n = self.fields.span();
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}

/// Returns the end position of this record.
Expand All @@ -87,8 +103,8 @@ impl Record {
pub fn end(&self) -> io::Result<vcf::record::Position> {
use vcf::record::Position;

let start = usize::from(self.position());
let len = self.rlen();
let start = self.position().map(usize::from)?;
let len = self.rlen()?;
let end = start + len - 1;

Ok(Position::from(end))
Expand All @@ -101,10 +117,11 @@ impl Record {
/// ```
/// use noodles_bcf as bcf;
/// let record = bcf::Record::default();
/// assert!(record.quality_score().is_none());
/// assert!(record.quality_score()?.is_none());
/// # Ok::<_, std::io::Error>(())
/// ```
pub fn quality_score(&self) -> Option<f32> {
self.qual
pub fn quality_score(&self) -> io::Result<Option<f32>> {
self.fields.quality_score()
}

/// Returns the IDs.
Expand Down Expand Up @@ -171,6 +188,7 @@ impl Record {
impl Default for Record {
fn default() -> Self {
Self {
fields: Fields::default(),
chrom: 0,
pos: vcf::record::Position::from(1),
rlen: 1,
Expand Down
6 changes: 3 additions & 3 deletions noodles-bcf/src/record/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl Record {
) -> io::Result<vcf::Record> {
let chromosome = string_maps
.contigs()
.get_index(self.chromosome_id())
.get_index(self.chromosome_id()?)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "invalid chrom"))?;

let filters = self
Expand All @@ -54,14 +54,14 @@ impl Record {

let mut builder = vcf::Record::builder()
.set_chromosome(chromosome)
.set_position(self.position())
.set_position(self.position()?)
.set_ids(self.ids().clone())
.set_reference_bases(self.reference_bases())
.set_alternate_bases(self.alternate_bases().clone())
.set_info(info)
.set_genotypes(genotypes);

if let Some(quality_score) = self.quality_score() {
if let Some(quality_score) = self.quality_score()? {
builder = builder.set_quality_score(quality_score);
}

Expand Down
80 changes: 80 additions & 0 deletions noodles-bcf/src/record/fields.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
mod bounds;

use std::io;

use self::bounds::Bounds;

#[derive(Clone, Debug, Eq, PartialEq)]
pub(crate) struct Fields {
site_buf: Vec<u8>,
samples_buf: Vec<u8>,
bounds: Bounds,
}

impl Fields {
pub(crate) fn site_buf_mut(&mut self) -> &mut Vec<u8> {
&mut self.site_buf
}

pub(crate) fn samples_buf_mut(&mut self) -> &mut Vec<u8> {
&mut self.samples_buf
}

pub(super) fn reference_sequence_id(&self) -> i32 {
let src = &self.site_buf[bounds::REFERENCE_SEQUENCE_ID_RANGE];
// SAFETY: `src` is 4 bytes.
i32::from_le_bytes(src.try_into().unwrap())
}

// N.B. this is 0-based.
pub(super) fn position(&self) -> i32 {
let src = &self.site_buf[bounds::POSITION_RANGE];
// SAFETY: `src` is 4 bytes.
i32::from_le_bytes(src.try_into().unwrap())
}

pub(super) fn span(&self) -> i32 {
let src = &self.site_buf[bounds::SPAN_RANGE];
// SAFETY: `src` is 4 bytes.
i32::from_le_bytes(src.try_into().unwrap())
}

pub(super) fn quality_score(&self) -> io::Result<Option<f32>> {
use crate::record::codec::value::Float;

let src = &self.site_buf[bounds::QUALITY_SCORE_RANGE];
// SAFETY: `src` is 4 bytes.
let n = f32::from_le_bytes(src.try_into().unwrap());

match Float::from(n) {
Float::Value(n) => Ok(Some(n)),
Float::Missing => Ok(None),
_ => Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid quality score",
)),
}
}
}

impl Default for Fields {
fn default() -> Self {
Self {
site_buf: vec![
0x00, 0x00, 0x00, 0x00, // chrom = 0
0x00, 0x00, 0x00, 0x00, // pos = 0 (0-based)
0x01, 0x00, 0x00, 0x00, // rlen = 1
0x01, 0x00, 0x80, 0x7f, // qual = None
0x00, 0x00, // n_info = 0
0x01, 0x00, // n_allele = 1
0x00, 0x00, 0x00, // n_sample = 0
0x00, // n_fmt = 0
0x07, // ids = []
0x17, 0x4e, // ref = N
0x00, // filters = []
],
samples_buf: Vec::new(),
bounds: Bounds,
}
}
}
Loading

0 comments on commit dc09abc

Please sign in to comment.