diff --git a/crates/examples/src/bin/dyldcachedump.rs b/crates/examples/src/bin/dyldcachedump.rs index bb2df698..7ec484ed 100644 --- a/crates/examples/src/bin/dyldcachedump.rs +++ b/crates/examples/src/bin/dyldcachedump.rs @@ -22,6 +22,7 @@ fn main() { continue; } }; + let subcache_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -29,7 +30,26 @@ fn main() { continue; } }; - let cache = match DyldCache::::parse(&*file) { + let subcache_files: Option> = subcache_files + .into_iter() + .map( + |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { + Ok(mmap) => Some(mmap), + Err(err) => { + eprintln!("Failed to map file '{}': {}", file_path, err); + None + } + }, + ) + .collect(); + let subcache_files: Vec<&[u8]> = match &subcache_files { + Some(subcache_files) => subcache_files + .iter() + .map(|subcache_file| &**subcache_file) + .collect(), + None => continue, + }; + let cache = match DyldCache::::parse(&*file, &subcache_files) { Ok(cache) => cache, Err(err) => { println!( @@ -48,3 +68,23 @@ fn main() { } } } + +// If the file is a dyld shared cache, and we're on macOS 12 or later, +// then there will be one or more "subcache" files next to this file, +// with the names filename.1, filename.2, ..., filename.symbols. +fn open_subcaches_if_exist(path: &str) -> Vec { + let mut files = Vec::new(); + for i in 1.. { + let subcache_path = format!("{}.{}", path, i); + match fs::File::open(&subcache_path) { + Ok(subcache_file) => files.push(subcache_file), + Err(_) => break, + }; + } + let symbols_subcache_path = format!("{}.symbols", path); + if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) { + files.push(subcache_file); + }; + println!("Found {} subcache files", files.len()); + files +} diff --git a/crates/examples/src/bin/objdump.rs b/crates/examples/src/bin/objdump.rs index c7f989b2..18cc48ae 100644 --- a/crates/examples/src/bin/objdump.rs +++ b/crates/examples/src/bin/objdump.rs @@ -18,6 +18,7 @@ fn main() { process::exit(1); } }; + let extra_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -25,8 +26,51 @@ fn main() { process::exit(1); } }; + let extra_files: Vec<_> = extra_files + .into_iter() + .map( + |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { + Ok(mmap) => mmap, + Err(err) => { + eprintln!("Failed to map file '{}': {}", file_path, err,); + process::exit(1); + } + }, + ) + .collect(); + let extra_file_data: Vec<&[u8]> = extra_files.iter().map(|f| &**f).collect(); let stdout = io::stdout(); let stderr = io::stderr(); - objdump::print(&mut stdout.lock(), &mut stderr.lock(), &*file, member_names).unwrap(); + objdump::print( + &mut stdout.lock(), + &mut stderr.lock(), + &*file, + &extra_file_data, + member_names, + ) + .unwrap(); +} + +// If the file is a dyld shared cache, and we're on macOS 12 or later, +// then there will be one or more "subcache" files next to this file, +// with the names filename.1, filename.2 etc. +// Read those files now, if they exist, even if we don't know that +// we're dealing with a dyld shared cache. By the time we know what +// we're dealing with, it's too late to read more files. +fn open_subcaches_if_exist(path: &str) -> Vec { + let mut files = Vec::new(); + for i in 1.. { + let subcache_path = format!("{}.{}", path, i); + match fs::File::open(&subcache_path) { + Ok(subcache_file) => files.push(subcache_file), + Err(_) => break, + }; + } + let symbols_subcache_path = format!("{}.symbols", path); + if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) { + files.push(subcache_file); + }; + println!("have {} extra files", files.len()); + files } diff --git a/crates/examples/src/objdump.rs b/crates/examples/src/objdump.rs index e01b88eb..1dd672b6 100644 --- a/crates/examples/src/objdump.rs +++ b/crates/examples/src/objdump.rs @@ -7,6 +7,7 @@ pub fn print( w: &mut W, e: &mut E, file: &[u8], + extra_files: &[&[u8]], member_names: Vec, ) -> Result<()> { let mut member_names: Vec<_> = member_names.into_iter().map(|name| (name, false)).collect(); @@ -47,7 +48,7 @@ pub fn print( Err(err) => writeln!(e, "Failed to parse Fat 64 data: {}", err)?, } } - } else if let Ok(cache) = DyldCache::::parse(&*file) { + } else if let Ok(cache) = DyldCache::::parse(&*file, extra_files) { writeln!(w, "Format: dyld cache {:?}-endian", cache.endianness())?; writeln!(w, "Architecture: {:?}", cache.architecture())?; for image in cache.images() { diff --git a/crates/examples/tests/testfiles.rs b/crates/examples/tests/testfiles.rs index 6fe22ee0..8e854571 100644 --- a/crates/examples/tests/testfiles.rs +++ b/crates/examples/tests/testfiles.rs @@ -28,7 +28,7 @@ fn testfiles() { println!("File {}", path); let data = fs::read(&path).unwrap(); fail |= testfile(path, &data, "objdump", |mut out, mut err, data| { - objdump::print(&mut out, &mut err, data, vec![]).unwrap() + objdump::print(&mut out, &mut err, data, &[], vec![]).unwrap() }); fail |= testfile(path, &data, "readobj", readobj::print); println!(); diff --git a/src/macho.rs b/src/macho.rs index 9d81f7d6..86a5bf62 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -284,26 +284,67 @@ pub const VM_PROT_EXECUTE: u32 = 0x04; // Definitions from https://opensource.apple.com/source/dyld/dyld-210.2.3/launch-cache/dyld_cache_format.h.auto.html -/// The dyld cache header, containing only the fields which are present -/// in all versions of dyld caches (dyld-95.3 and up). -/// Many more fields exist in later dyld versions, but we currently do -/// not need to parse those. +/// The dyld cache header. /// Corresponds to struct dyld_cache_header from dyld_cache_format.h. +/// This header has grown over time. Only the fields up to and including dyld_base_address +/// are guaranteed to be present. For all other fields, check the header size before +/// accessing the field. The header size is stored in mapping_offset; the mappings start +/// right after the theader. #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct DyldCacheHeader { /// e.g. "dyld_v0 i386" pub magic: [u8; 16], /// file offset to first dyld_cache_mapping_info - pub mapping_offset: U32, + pub mapping_offset: U32, // offset: 0x10 /// number of dyld_cache_mapping_info entries - pub mapping_count: U32, + pub mapping_count: U32, // offset: 0x14 /// file offset to first dyld_cache_image_info - pub images_offset: U32, + pub images_offset: U32, // offset: 0x18 /// number of dyld_cache_image_info entries - pub images_count: U32, + pub images_count: U32, // offset: 0x1c /// base address of dyld when cache was built - pub dyld_base_address: U64, + pub dyld_base_address: U64, // offset: 0x20 + /// + reserved1: [u8; 32], // offset: 0x28 + /// file offset of where local symbols are stored + pub local_symbols_offset: U64, // offset: 0x48 + /// size of local symbols information + pub local_symbols_size: U64, // offset: 0x50 + /// unique value for each shared cache file + pub uuid: [u8; 16], // offset: 0x58 + /// + reserved2: [u8; 32], // offset: 0x68 + /// + reserved3: [u8; 32], // offset: 0x88 + /// + reserved4: [u8; 32], // offset: 0xa8 + /// + reserved5: [u8; 32], // offset: 0xc8 + /// + reserved6: [u8; 32], // offset: 0xe8 + /// + reserved7: [u8; 32], // offset: 0x108 + /// + reserved8: [u8; 32], // offset: 0x128 + /// + reserved9: [u8; 32], // offset: 0x148 + /// + reserved10: [u8; 32], // offset: 0x168 + /// file offset to first dyld_subcache_info + pub subcaches_offset: U32, // offset: 0x188 + /// number of dyld_subcache_info entries + pub subcaches_count: U32, // offset: 0x18c + /// the UUID of the .symbols subcache + pub symbols_subcache_uuid: [u8; 16], // offset: 0x190 + /// + reserved11: [u8; 32], // offset: 0x1a0 + /// file offset to first dyld_cache_image_info + /// Use this instead of images_offset if mapping_offset is at least 0x1c4. + pub images_across_all_subcaches_offset: U32, // offset: 0x1c0 + /// number of dyld_cache_image_info entries + /// Use this instead of images_count if mapping_offset is at least 0x1c4. + pub images_across_all_subcaches_count: U32, // offset: 0x1c4 } /// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h. @@ -338,6 +379,17 @@ pub struct DyldCacheImageInfo { pub pad: U32, } +/// Corresponds to a struct whose source code has not been published as of Nov 2021. +/// Added in the dyld cache version which shipped with macOS 12 / iOS 15. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldSubCacheInfo { + /// The UUID of this subcache. + pub uuid: [u8; 16], + /// The size of this subcache plus all previous subcaches. + pub cumulative_size: U64, +} + // Definitions from "/usr/include/mach-o/loader.h". /* @@ -3199,6 +3251,7 @@ unsafe_impl_endian_pod!( DyldCacheHeader, DyldCacheMappingInfo, DyldCacheImageInfo, + DyldSubCacheInfo, MachHeader32, MachHeader64, LoadCommand, diff --git a/src/read/any.rs b/src/read/any.rs index 940cf278..ce99bc31 100644 --- a/src/read/any.rs +++ b/src/read/any.rs @@ -20,7 +20,7 @@ use crate::read::{ SymbolMapName, SymbolScope, SymbolSection, }; #[allow(unused_imports)] -use crate::Endianness; +use crate::{AddressSize, Endian, Endianness}; /// Evaluate an expression on the contents of a file format enum. /// @@ -220,23 +220,21 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { Ok(File { inner }) } - /// Parse the raw file data at an arbitrary offset inside the input data. - /// - /// Currently, this is only supported for Mach-O images. - /// This can be used for parsing Mach-O images inside the dyld shared cache, - /// where multiple images, located at different offsets, share the same address - /// space. - pub fn parse_at(data: R, offset: u64) -> Result { - let _inner = match FileKind::parse_at(data, offset)? { - #[cfg(feature = "macho")] - FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?), - #[cfg(feature = "macho")] - FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?), - #[allow(unreachable_patterns)] + /// Parse a Mach-O image from the dyld shared cache. + #[cfg(feature = "macho")] + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &macho::DyldCacheImage<'data, 'cache, E, R>, + ) -> Result { + let inner = match image.cache.architecture().address_size() { + Some(AddressSize::U64) => { + FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?) + } + Some(AddressSize::U32) => { + FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?) + } _ => return Err(Error("Unsupported file format")), }; - #[allow(unreachable_code)] - Ok(File { inner: _inner }) + Ok(File { inner }) } /// Return the file format. diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index ee758ce0..52a2f7c0 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use core::slice; use crate::read::{Error, File, ReadError, ReadRef, Result}; @@ -12,26 +13,89 @@ where { endian: E, data: R, + subcaches: Vec>, + symbols_subcache: Option>, header: &'data macho::DyldCacheHeader, mappings: &'data [macho::DyldCacheMappingInfo], images: &'data [macho::DyldCacheImageInfo], arch: Architecture, } +/// Information about a subcache. +#[derive(Debug)] +pub struct DyldSubCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + data: R, + mappings: &'data [macho::DyldCacheMappingInfo], +} + +// This is the offset of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; + impl<'data, E, R> DyldCache<'data, E, R> where E: Endian, R: ReadRef<'data>, { /// Parse the raw dyld shared cache data. - pub fn parse(data: R) -> Result { + /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be + /// supplied as well, in the correct order, with the .symbols subcache last (if present). + /// For example, data would be the data for dyld_shared_cache_x86_64, + /// and subcache_data would be the data for [dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...] + pub fn parse(data: R, subcache_data: &[R]) -> Result { let header = macho::DyldCacheHeader::parse(data)?; let (arch, endian) = header.parse_magic()?; let mappings = header.mappings(endian, data)?; + + let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); + let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); + + if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + return Err(Error("Incorrect number of SubCaches")); + } + + // Split out the .symbols subcache data from the other subcaches. + let (symbols_subcache_data_and_uuid, subcache_data) = + if let Some(symbols_uuid) = symbols_subcache_uuid { + let (sym_data, rest_data) = subcache_data.split_last().unwrap(); + (Some((*sym_data, symbols_uuid)), rest_data) + } else { + (None, subcache_data) + }; + + // Read the regular SubCaches (.1, .2, ...), if present. + let mut subcaches = Vec::new(); + for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != info.uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); + } + + // Read the .symbols SubCache, if present. + let symbols_subcache = match symbols_subcache_data_and_uuid { + Some((data, uuid)) => { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != uuid { + return Err(Error("Unexpected .symbols SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + Some(DyldSubCache { data, mappings }) + } + None => None, + }; + let images = header.images(endian, data)?; Ok(DyldCache { endian, data, + subcaches, + symbols_subcache, header, mappings, images, @@ -66,6 +130,22 @@ where iter: self.images.iter(), } } + + /// Find the address in a mapping and return the cache or subcache data it was found in, + /// together with the translated file offset. + pub fn data_and_offset_for_address(&self, address: u64) -> Option<(R, u64)> { + if let Some(file_offset) = address_to_file_offset(address, self.endian, self.mappings) { + return Some((self.data, file_offset)); + } + for subcache in &self.subcaches { + if let Some(file_offset) = + address_to_file_offset(address, self.endian, subcache.mappings) + { + return Some((subcache.data, file_offset)); + } + } + None + } } /// An iterator over all the images (dylibs) in the dyld shared cache. @@ -84,14 +164,12 @@ where E: Endian, R: ReadRef<'data>, { - type Item = DyldCacheImage<'data, E, R>; + type Item = DyldCacheImage<'data, 'cache, E, R>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { let image_info = self.iter.next()?; Some(DyldCacheImage { - endian: self.cache.endian, - data: self.cache.data, - mappings: self.cache.mappings, + cache: self.cache, image_info, }) } @@ -99,38 +177,39 @@ where /// One image (dylib) from inside the dyld shared cache. #[derive(Debug)] -pub struct DyldCacheImage<'data, E = Endianness, R = &'data [u8]> +pub struct DyldCacheImage<'data, 'cache, E = Endianness, R = &'data [u8]> where E: Endian, R: ReadRef<'data>, { - endian: E, - data: R, - mappings: &'data [macho::DyldCacheMappingInfo], + pub(crate) cache: &'cache DyldCache<'data, E, R>, image_info: &'data macho::DyldCacheImageInfo, } -impl<'data, E, R> DyldCacheImage<'data, E, R> +impl<'data, 'cache, E, R> DyldCacheImage<'data, 'cache, E, R> where E: Endian, R: ReadRef<'data>, { /// The file system path of this image. pub fn path(&self) -> Result<&'data str> { - let path = self.image_info.path(self.endian, self.data)?; + let path = self.image_info.path(self.cache.endian, self.cache.data)?; // The path should always be ascii, so from_utf8 should alway succeed. let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?; Ok(path) } /// The offset in the dyld cache file where this image starts. - pub fn file_offset(&self) -> Result { - self.image_info.file_offset(self.endian, self.mappings) + pub fn image_data_and_offset(&self) -> Result<(R, u64)> { + let address = self.image_info.address.get(self.cache.endian); + self.cache + .data_and_offset_for_address(address) + .ok_or(Error("Address not found in any mapping")) } /// Parse this image into an Object. pub fn parse_object(&self) -> Result> { - File::parse_at(self.data, self.file_offset()?) + File::parse_dyld_cache_image(self) } } @@ -175,17 +254,55 @@ impl macho::DyldCacheHeader { .read_error("Invalid dyld cache mapping size or alignment") } + /// Return the information about subcaches, if present. + pub fn subcaches<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result]>> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(subcaches)) + } else { + Ok(None) + } + } + + /// Return the UUID for the .symbols subcache, if present. + pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let uuid = self.symbols_subcache_uuid; + if uuid != [0; 16] { + return Some(uuid); + } + } + None + } + /// Return the image information table. pub fn images<'data, R: ReadRef<'data>>( &self, endian: E, data: R, ) -> Result<&'data [macho::DyldCacheImageInfo]> { - data.read_slice_at::>( - self.images_offset.get(endian).into(), - self.images_count.get(endian) as usize, - ) - .read_error("Invalid dyld cache image size or alignment") + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + data.read_slice_at::>( + self.images_across_all_subcaches_offset.get(endian).into(), + self.images_across_all_subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } else { + data.read_slice_at::>( + self.images_offset.get(endian).into(), + self.images_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } } } @@ -205,14 +322,26 @@ impl macho::DyldCacheImageInfo { mappings: &[macho::DyldCacheMappingInfo], ) -> Result { let address = self.address.get(endian); - for mapping in mappings { - let mapping_address = mapping.address.get(endian); - if address >= mapping_address - && address < mapping_address.wrapping_add(mapping.size.get(endian)) - { - return Ok(address - mapping_address + mapping.file_offset.get(endian)); - } + match address_to_file_offset(address, endian, mappings) { + Some(file_offset) => Ok(file_offset), + None => Err(Error("Invalid dyld cache image address")), + } + } +} + +/// Find the file offset of the image by looking up its address in the mappings. +pub fn address_to_file_offset( + address: u64, + endian: E, + mappings: &[macho::DyldCacheMappingInfo], +) -> Option { + for mapping in mappings { + let mapping_address = mapping.address.get(endian); + if address >= mapping_address + && address < mapping_address.wrapping_add(mapping.size.get(endian)) + { + return Some(address - mapping_address + mapping.file_offset.get(endian)); } - Err(Error("Invalid dyld cache image address")) } + None } diff --git a/src/read/macho/file.rs b/src/read/macho/file.rs index 0d4961b1..559f001a 100644 --- a/src/read/macho/file.rs +++ b/src/read/macho/file.rs @@ -10,9 +10,9 @@ use crate::read::{ use crate::{endian, macho, BigEndian, ByteString, Endian, Endianness, Pod}; use super::{ - LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, MachOSegment, - MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, MachOSymbolTable, Nlist, Section, - Segment, SymbolTable, + DyldCacheImage, LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, + MachOSegment, MachOSegmentInternal, MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, + MachOSymbolTable, Nlist, Section, Segment, SymbolTable, }; /// A 32-bit Mach-O object file. @@ -35,6 +35,7 @@ where pub(super) data: R, pub(super) header_offset: u64, pub(super) header: &'data Mach, + pub(super) segments: Vec>, pub(super) sections: Vec>, pub(super) symbols: SymbolTable<'data, Mach, R>, } @@ -46,38 +47,95 @@ where { /// Parse the raw Mach-O file data. pub fn parse(data: R) -> Result { - Self::parse_at(data, 0) + let header = Mach::parse(data, 0)?; + let endian = header.endian()?; + + // Build a list of segments and sections to make some operations more efficient. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut symbols = SymbolTable::default(); + if let Ok(mut commands) = header.load_commands(endian, data, 0) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(symtab) = command.symtab()? { + symbols = symtab.symbols(endian, data)?; + } + } + } + + Ok(MachOFile { + endian, + data, + header_offset: 0, + header, + segments, + sections, + symbols, + }) } - /// Parse the raw Mach-O file data at an arbitrary offset inside the input data. - /// This can be used for parsing Mach-O images inside the dyld shared cache, - /// where multiple images, located at different offsets, share the same address - /// space. - pub fn parse_at(data: R, header_offset: u64) -> Result { + /// Parse the Mach-O file for the given image from the dyld shared cache. + /// This will read different sections from different subcaches, if necessary. + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &DyldCacheImage<'data, 'cache, E, R>, + ) -> Result { + let (data, header_offset) = image.image_data_and_offset()?; let header = Mach::parse(data, header_offset)?; let endian = header.endian()?; - let mut symbols = SymbolTable::default(); // Build a list of sections to make some operations more efficient. + // Also build a list of segments, because we need to remember which ReadRef + // to read each section's data from. Only the DyldCache knows this information, + // and we won't have access to it once we've exited this function. + let mut segments = Vec::new(); let mut sections = Vec::new(); + let mut linkedit_data: Option = None; + let mut symtab = None; if let Ok(mut commands) = header.load_commands(endian, data, header_offset) { while let Ok(Some(command)) = commands.next() { if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + // Each segment can be stored in a different subcache. Get the segment's + // address and look it up in the cache mappings, to find the correct cache data. + let addr = segment.vmaddr(endian).into(); + let (data, _offset) = image + .cache + .data_and_offset_for_address(addr) + .read_error("Could not find segment data in dyld shared cache")?; + if segment.name() == macho::SEG_LINKEDIT.as_bytes() { + linkedit_data = Some(data); + } + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { let index = SectionIndex(sections.len() + 1); - sections.push(MachOSectionInternal::parse(index, section)); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); } - } else if let Some(symtab) = command.symtab()? { - symbols = symtab.symbols(endian, data)?; + } else if let Some(st) = command.symtab()? { + symtab = Some(st); } } } + // The symbols are found in the __LINKEDIT segment, so make sure to read them from the + // correct subcache. + let symbols = match (symtab, linkedit_data) { + (Some(symtab), Some(linkedit_data)) => symtab.symbols(endian, linkedit_data)?, + _ => SymbolTable::default(), + }; + Ok(MachOFile { endian, data, header_offset, header, + segments, sections, symbols, }) @@ -95,6 +153,15 @@ where .and_then(|index| self.sections.get(index)) .read_error("Invalid Mach-O section index") } + + pub(super) fn segment_internal( + &self, + index: usize, + ) -> Result<&MachOSegmentInternal<'data, Mach, R>> { + self.segments + .get(index) + .read_error("Invalid Mach-O segment index") + } } impl<'data, Mach, R> read::private::Sealed for MachOFile<'data, Mach, R> @@ -155,11 +222,7 @@ where fn segments(&'file self) -> MachOSegmentIterator<'data, 'file, Mach, R> { MachOSegmentIterator { file: self, - commands: self - .header - .load_commands(self.endian, self.data, self.header_offset) - .ok() - .unwrap_or_else(Default::default), + iter: self.segments.iter(), } } diff --git a/src/read/macho/section.rs b/src/read/macho/section.rs index 3a5a22eb..9e71aa8f 100644 --- a/src/read/macho/section.rs +++ b/src/read/macho/section.rs @@ -80,9 +80,11 @@ where R: ReadRef<'data>, { fn bytes(&self) -> Result<&'data [u8]> { + let segment_index = self.internal.segment_index; + let segment = self.file.segment_internal(segment_index)?; self.internal .section - .data(self.file.endian, self.file.data) + .data(self.file.endian, segment.data) .read_error("Invalid Mach-O section size or offset") } } @@ -202,12 +204,17 @@ where #[derive(Debug, Clone, Copy)] pub(super) struct MachOSectionInternal<'data, Mach: MachHeader> { pub index: SectionIndex, + pub segment_index: usize, pub kind: SectionKind, pub section: &'data Mach::Section, } impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { - pub(super) fn parse(index: SectionIndex, section: &'data Mach::Section) -> Self { + pub(super) fn parse( + index: SectionIndex, + segment_index: usize, + section: &'data Mach::Section, + ) -> Self { // TODO: we don't validate flags, should we? let kind = match (section.segment_name(), section.name()) { (b"__TEXT", b"__text") => SectionKind::Text, @@ -230,6 +237,7 @@ impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { }; MachOSectionInternal { index, + segment_index, kind, section, } diff --git a/src/read/macho/segment.rs b/src/read/macho/segment.rs index 3c2d9649..3a09379d 100644 --- a/src/read/macho/segment.rs +++ b/src/read/macho/segment.rs @@ -1,12 +1,12 @@ use core::fmt::Debug; -use core::{result, str}; +use core::{result, slice, str}; use crate::endian::{self, Endianness}; use crate::macho; use crate::pod::Pod; use crate::read::{self, ObjectSegment, ReadError, ReadRef, Result}; -use super::{LoadCommandData, LoadCommandIterator, MachHeader, MachOFile, Section}; +use super::{LoadCommandData, MachHeader, MachOFile, Section}; /// An iterator over the segments of a `MachOFile32`. pub type MachOSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = @@ -24,7 +24,7 @@ where R: ReadRef<'data>, { pub(super) file: &'file MachOFile<'data, Mach, R>, - pub(super) commands: LoadCommandIterator<'data, Mach::Endian>, + pub(super) iter: slice::Iter<'file, MachOSegmentInternal<'data, Mach, R>>, } impl<'data, 'file, Mach, R> Iterator for MachOSegmentIterator<'data, 'file, Mach, R> @@ -35,15 +35,10 @@ where type Item = MachOSegment<'data, 'file, Mach, R>; fn next(&mut self) -> Option { - loop { - let command = self.commands.next().ok()??; - if let Ok(Some((segment, _))) = Mach::Segment::from_command(command) { - return Some(MachOSegment { - file: self.file, - segment, - }); - } - } + self.iter.next().map(|internal| MachOSegment { + file: self.file, + internal, + }) } } @@ -63,7 +58,7 @@ where R: ReadRef<'data>, { file: &'file MachOFile<'data, Mach, R>, - segment: &'data Mach::Segment, + internal: &'file MachOSegmentInternal<'data, Mach, R>, } impl<'data, 'file, Mach, R> MachOSegment<'data, 'file, Mach, R> @@ -72,7 +67,8 @@ where R: ReadRef<'data>, { fn bytes(&self) -> Result<&'data [u8]> { - self.segment + self.internal + .segment .data(self.file.endian, self.file.data) .read_error("Invalid Mach-O segment size or offset") } @@ -92,12 +88,12 @@ where { #[inline] fn address(&self) -> u64 { - self.segment.vmaddr(self.file.endian).into() + self.internal.segment.vmaddr(self.file.endian).into() } #[inline] fn size(&self) -> u64 { - self.segment.vmsize(self.file.endian).into() + self.internal.segment.vmsize(self.file.endian).into() } #[inline] @@ -108,7 +104,7 @@ where #[inline] fn file_range(&self) -> (u64, u64) { - self.segment.file_range(self.file.endian) + self.internal.segment.file_range(self.file.endian) } fn data(&self) -> Result<&'data [u8]> { @@ -126,19 +122,25 @@ where #[inline] fn name_bytes(&self) -> Result> { - Ok(Some(self.segment.name())) + Ok(Some(self.internal.segment.name())) } #[inline] fn name(&self) -> Result> { Ok(Some( - str::from_utf8(self.segment.name()) + str::from_utf8(self.internal.segment.name()) .ok() .read_error("Non UTF-8 Mach-O segment name")?, )) } } +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSegmentInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { + pub data: R, + pub segment: &'data Mach::Segment, +} + /// A trait for generic access to `SegmentCommand32` and `SegmentCommand64`. #[allow(missing_docs)] pub trait Segment: Debug + Pod {