Skip to content

Commit

Permalink
read: add read_bytes_at_until
Browse files Browse the repository at this point in the history
Use this for dyld cache image paths. The main benefit is avoiding
an artificial limit for mapped data.
  • Loading branch information
philipc committed May 26, 2021
1 parent 3c0504b commit df31a2c
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 11 deletions.
14 changes: 3 additions & 11 deletions src/read/macho/dyld_cache.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use core::slice;

use crate::read::{Error, File, ReadError, ReadRef, Result};
use crate::{macho, Architecture, Bytes, Endian, Endianness};
use crate::{macho, Architecture, Endian, Endianness};

/// A parsed representation of the dyld shared cache.
#[derive(Debug)]
Expand Down Expand Up @@ -192,16 +192,8 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
impl<E: Endian> macho::DyldCacheImageInfo<E> {
/// The file system path of this image.
pub fn path<'data, R: ReadRef<'data>>(&self, endian: E, data: R) -> Result<&'data [u8]> {
// The longest path I've seen is 164 bytes long. In theory paths could be longer than 256.
const MAX_PATH_LEN: u64 = 256;

let path_offset = self.path_file_offset.get(endian).into();
let slice_containing_path = data
.read_bytes_at(path_offset, MAX_PATH_LEN)
.read_error("Couldn't read path")?;
Bytes(slice_containing_path)
.read_string()
.read_error("Couldn't read path string (didn't find nul byte within first 256 bytes)")
data.read_bytes_at_until(self.path_file_offset.get(endian).into(), 0)
.read_error("Couldn't read dyld cache image path")
}

/// Find the file offset of the image by looking up its address in the mappings.
Expand Down
52 changes: 52 additions & 0 deletions src/read/read_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::collections::HashMap;
use std::convert::TryInto;
use std::io::{Read, Seek, SeekFrom};
use std::mem;
use std::vec::Vec;

use crate::read::ReadRef;

Expand All @@ -24,6 +25,7 @@ pub struct ReadCache<R: Read + Seek> {
struct ReadCacheInternal<R: Read + Seek> {
read: R,
bufs: HashMap<(u64, u64), Box<[u8]>>,
strings: HashMap<(u64, u8), Box<[u8]>>,
}

impl<R: Read + Seek> ReadCache<R> {
Expand All @@ -33,6 +35,7 @@ impl<R: Read + Seek> ReadCache<R> {
cache: RefCell::new(ReadCacheInternal {
read,
bufs: HashMap::new(),
strings: HashMap::new(),
}),
}
}
Expand Down Expand Up @@ -86,6 +89,44 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
// This is OK because we never mutate or remove entries.
Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
}

fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8], ()> {
let cache = &mut *self.cache.borrow_mut();
let buf = match cache.strings.entry((offset, delimiter)) {
Entry::Occupied(entry) => entry.into_mut(),
Entry::Vacant(entry) => {
cache
.read
.seek(SeekFrom::Start(offset as u64))
.map_err(|_| ())?;
let mut bytes = Vec::new();
let mut checked = 0;
loop {
bytes.resize(checked + 256, 0);
let read = cache.read.read(&mut bytes[checked..]).map_err(|_| ())?;
if read == 0 {
return Err(());
}
match memchr::memchr(delimiter, &bytes[checked..][..read]) {
Some(len) => {
bytes.truncate(checked + len);
break entry.insert(bytes.into_boxed_slice());
}
None => {}
}
checked += read;
// Strings should be relatively small.
// TODO: make this configurable?
if checked > 4096 {
return Err(());
}
}
}
};
// Extend the lifetime to that of self.
// This is OK because we never mutate or remove entries.
Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
}
}

/// An implementation of `ReadRef` for a range of data in a stream that
Expand Down Expand Up @@ -127,4 +168,15 @@ impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> {
let r_offset = self.offset.checked_add(offset).ok_or(())?;
self.r.read_bytes_at(r_offset, size)
}

fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8], ()> {
let r_offset = self.offset.checked_add(offset).ok_or(())?;
let bytes = self.r.read_bytes_at_until(r_offset, delimiter)?;
let size = bytes.len().try_into().map_err(|_| ())?;
let end = offset.checked_add(size).ok_or(())?;
if end > self.size {
return Err(());
}
Ok(bytes)
}
}
20 changes: 20 additions & 0 deletions src/read/read_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ pub trait ReadRef<'a>: Clone + Copy {
/// Returns an error if offset or size are out of bounds.
fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]>;

/// Get a reference to a delimited `u8` slice at the given offset.
///
/// Does not include the delimiter.
///
/// Returns an error if offset is out of bounds or the delimiter is
/// not found.
fn read_bytes_at_until(self, offset: u64, terminator: u8) -> Result<&'a [u8]>;

/// Get a reference to a `u8` slice at the given offset, and update the offset.
///
/// Returns an error if offset or size are out of bounds.
Expand Down Expand Up @@ -110,4 +118,16 @@ impl<'a> ReadRef<'a> for &'a [u8] {
let size: usize = size.try_into().map_err(|_| ())?;
self.get(offset..).ok_or(())?.get(..size).ok_or(())
}

fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8]> {
let offset: usize = offset.try_into().map_err(|_| ())?;
let bytes = self.get(offset..).ok_or(())?;
match memchr::memchr(delimiter, bytes) {
Some(len) => {
// This will never fail.
bytes.get(..len).ok_or(())
}
None => Err(()),
}
}
}

0 comments on commit df31a2c

Please sign in to comment.