Skip to content

Commit f4cf58f

Browse files
committed
read: add read_bytes_at_until
Use this for dyld cache image paths. The main benefit is avoiding an artificial limit for mapped data.
1 parent 3c0504b commit f4cf58f

File tree

3 files changed

+75
-11
lines changed

3 files changed

+75
-11
lines changed

src/read/macho/dyld_cache.rs

+3-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use core::slice;
22

33
use crate::read::{Error, File, ReadError, ReadRef, Result};
4-
use crate::{macho, Architecture, Bytes, Endian, Endianness};
4+
use crate::{macho, Architecture, Endian, Endianness};
55

66
/// A parsed representation of the dyld shared cache.
77
#[derive(Debug)]
@@ -192,16 +192,8 @@ impl<E: Endian> macho::DyldCacheHeader<E> {
192192
impl<E: Endian> macho::DyldCacheImageInfo<E> {
193193
/// The file system path of this image.
194194
pub fn path<'data, R: ReadRef<'data>>(&self, endian: E, data: R) -> Result<&'data [u8]> {
195-
// The longest path I've seen is 164 bytes long. In theory paths could be longer than 256.
196-
const MAX_PATH_LEN: u64 = 256;
197-
198-
let path_offset = self.path_file_offset.get(endian).into();
199-
let slice_containing_path = data
200-
.read_bytes_at(path_offset, MAX_PATH_LEN)
201-
.read_error("Couldn't read path")?;
202-
Bytes(slice_containing_path)
203-
.read_string()
204-
.read_error("Couldn't read path string (didn't find nul byte within first 256 bytes)")
195+
data.read_bytes_at_until(self.path_file_offset.get(endian).into(), 0)
196+
.read_error("Couldn't read dyld cache image path")
205197
}
206198

207199
/// Find the file offset of the image by looking up its address in the mappings.

src/read/read_cache.rs

+52
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::collections::HashMap;
55
use std::convert::TryInto;
66
use std::io::{Read, Seek, SeekFrom};
77
use std::mem;
8+
use std::vec::Vec;
89

910
use crate::read::ReadRef;
1011

@@ -24,6 +25,7 @@ pub struct ReadCache<R: Read + Seek> {
2425
struct ReadCacheInternal<R: Read + Seek> {
2526
read: R,
2627
bufs: HashMap<(u64, u64), Box<[u8]>>,
28+
strings: HashMap<(u64, u8), Box<[u8]>>,
2729
}
2830

2931
impl<R: Read + Seek> ReadCache<R> {
@@ -33,6 +35,7 @@ impl<R: Read + Seek> ReadCache<R> {
3335
cache: RefCell::new(ReadCacheInternal {
3436
read,
3537
bufs: HashMap::new(),
38+
strings: HashMap::new(),
3639
}),
3740
}
3841
}
@@ -86,6 +89,44 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
8689
// This is OK because we never mutate or remove entries.
8790
Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
8891
}
92+
93+
fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8], ()> {
94+
let cache = &mut *self.cache.borrow_mut();
95+
let buf = match cache.strings.entry((offset, delimiter)) {
96+
Entry::Occupied(entry) => entry.into_mut(),
97+
Entry::Vacant(entry) => {
98+
cache
99+
.read
100+
.seek(SeekFrom::Start(offset as u64))
101+
.map_err(|_| ())?;
102+
let mut bytes = Vec::new();
103+
let mut checked = 0;
104+
loop {
105+
bytes.resize(checked + 256, 0);
106+
let read = cache.read.read(&mut bytes[checked..]).map_err(|_| ())?;
107+
if read == 0 {
108+
return Err(());
109+
}
110+
match memchr::memchr(delimiter, &bytes[checked..][..read]) {
111+
Some(len) => {
112+
bytes.truncate(checked + len);
113+
break entry.insert(bytes.into_boxed_slice());
114+
}
115+
None => {}
116+
}
117+
checked += read;
118+
// Strings should be relatively small.
119+
// TODO: make this configurable?
120+
if checked > 4096 {
121+
return Err(());
122+
}
123+
}
124+
}
125+
};
126+
// Extend the lifetime to that of self.
127+
// This is OK because we never mutate or remove entries.
128+
Ok(unsafe { mem::transmute::<&[u8], &[u8]>(buf) })
129+
}
89130
}
90131

91132
/// An implementation of `ReadRef` for a range of data in a stream that
@@ -127,4 +168,15 @@ impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> {
127168
let r_offset = self.offset.checked_add(offset).ok_or(())?;
128169
self.r.read_bytes_at(r_offset, size)
129170
}
171+
172+
fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8], ()> {
173+
let r_offset = self.offset.checked_add(offset).ok_or(())?;
174+
let bytes = self.r.read_bytes_at_until(r_offset, delimiter)?;
175+
let size = bytes.len().try_into().map_err(|_| ())?;
176+
let end = offset.checked_add(size).ok_or(())?;
177+
if end > self.size {
178+
return Err(());
179+
}
180+
Ok(bytes)
181+
}
130182
}

src/read/read_ref.rs

+20
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,14 @@ pub trait ReadRef<'a>: Clone + Copy {
4040
/// Returns an error if offset or size are out of bounds.
4141
fn read_bytes_at(self, offset: u64, size: u64) -> Result<&'a [u8]>;
4242

43+
/// Get a reference to a delimited `u8` slice at the given offset.
44+
///
45+
/// Does not include the delimiter.
46+
///
47+
/// Returns an error if offset is out of bounds or the delimiter is
48+
/// not found.
49+
fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8]>;
50+
4351
/// Get a reference to a `u8` slice at the given offset, and update the offset.
4452
///
4553
/// Returns an error if offset or size are out of bounds.
@@ -110,4 +118,16 @@ impl<'a> ReadRef<'a> for &'a [u8] {
110118
let size: usize = size.try_into().map_err(|_| ())?;
111119
self.get(offset..).ok_or(())?.get(..size).ok_or(())
112120
}
121+
122+
fn read_bytes_at_until(self, offset: u64, delimiter: u8) -> Result<&'a [u8]> {
123+
let offset: usize = offset.try_into().map_err(|_| ())?;
124+
let bytes = self.get(offset..).ok_or(())?;
125+
match memchr::memchr(delimiter, bytes) {
126+
Some(len) => {
127+
// This will never fail.
128+
bytes.get(..len).ok_or(())
129+
}
130+
None => Err(()),
131+
}
132+
}
113133
}

0 commit comments

Comments
 (0)