From af1780b1270777be0c3d2092c0bbc72458441ebf Mon Sep 17 00:00:00 2001 From: Markus Stange Date: Sat, 6 Nov 2021 13:06:58 -0400 Subject: [PATCH] Add support for split dyld shared cache. Fixes #358. This adds support for the dyld cache format that is used on macOS 12 and iOS 15. The cache is split over multiple files, with a "root" cache and one or more subcaches, for example: ``` /System/Library/dyld/dyld_shared_cache_x86_64 /System/Library/dyld/dyld_shared_cache_x86_64.1 /System/Library/dyld/dyld_shared_cache_x86_64.2 /System/Library/dyld/dyld_shared_cache_x86_64.3 ``` Additionally, on iOS, there is a separate .symbols subcache, which contains local symbols. Each file has a set of mappings. For each image in the cache, the segments of that image can be distributed over multiple files: For example, on macOS 12.0.1, the image for libsystem_malloc.dylib for the arm64e architecture has its __TEXT segment in the root cache and the __LINKEDIT segment in the .1 subcache - there's a single __LINKEDIT segment which is shared between all images across both files. The remaining libsystem_malloc.dylib segments are in the same file as the __TEXT segment. The DyldCache API now requires the data for all subcaches to be supplied to the constructor. The parse_at methods have been removed and been replaced with a parse_dyld_cache_image method. With this patch, the following command outputs correct symbols for libsystem_malloc.dylib: ``` cargo run --release --bin objdump -- /System/Library/dyld/dyld_shared_cache_arm64e /usr/lib/system/libsystem_malloc.dylib ``` Support for local symbols is not implemented. But, as a first step, DyldCache::parse requires the .symbols subcache to be supplied (if the root cache expects one to be present) and checks that its UUID is correct. MachOFile doesn't do anything with ilocalsym and nlocalsym yet, and we don't yet have the struct definitions for dyld_cache_local_symbols_info and dyld_cache_local_symbols_entry. --- crates/examples/src/bin/dyldcachedump.rs | 42 ++++- crates/examples/src/bin/objdump.rs | 46 +++++- crates/examples/src/objdump.rs | 3 +- crates/examples/tests/testfiles.rs | 2 +- src/macho.rs | 71 +++++++-- src/read/any.rs | 30 ++-- src/read/macho/dyld_cache.rs | 185 +++++++++++++++++++---- src/read/macho/file.rs | 99 +++++++++--- src/read/macho/section.rs | 12 +- src/read/macho/segment.rs | 40 ++--- 10 files changed, 434 insertions(+), 96 deletions(-) diff --git a/crates/examples/src/bin/dyldcachedump.rs b/crates/examples/src/bin/dyldcachedump.rs index bb2df698..7ec484ed 100644 --- a/crates/examples/src/bin/dyldcachedump.rs +++ b/crates/examples/src/bin/dyldcachedump.rs @@ -22,6 +22,7 @@ fn main() { continue; } }; + let subcache_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -29,7 +30,26 @@ fn main() { continue; } }; - let cache = match DyldCache::::parse(&*file) { + let subcache_files: Option> = subcache_files + .into_iter() + .map( + |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { + Ok(mmap) => Some(mmap), + Err(err) => { + eprintln!("Failed to map file '{}': {}", file_path, err); + None + } + }, + ) + .collect(); + let subcache_files: Vec<&[u8]> = match &subcache_files { + Some(subcache_files) => subcache_files + .iter() + .map(|subcache_file| &**subcache_file) + .collect(), + None => continue, + }; + let cache = match DyldCache::::parse(&*file, &subcache_files) { Ok(cache) => cache, Err(err) => { println!( @@ -48,3 +68,23 @@ fn main() { } } } + +// If the file is a dyld shared cache, and we're on macOS 12 or later, +// then there will be one or more "subcache" files next to this file, +// with the names filename.1, filename.2, ..., filename.symbols. +fn open_subcaches_if_exist(path: &str) -> Vec { + let mut files = Vec::new(); + for i in 1.. { + let subcache_path = format!("{}.{}", path, i); + match fs::File::open(&subcache_path) { + Ok(subcache_file) => files.push(subcache_file), + Err(_) => break, + }; + } + let symbols_subcache_path = format!("{}.symbols", path); + if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) { + files.push(subcache_file); + }; + println!("Found {} subcache files", files.len()); + files +} diff --git a/crates/examples/src/bin/objdump.rs b/crates/examples/src/bin/objdump.rs index c7f989b2..18cc48ae 100644 --- a/crates/examples/src/bin/objdump.rs +++ b/crates/examples/src/bin/objdump.rs @@ -18,6 +18,7 @@ fn main() { process::exit(1); } }; + let extra_files = open_subcaches_if_exist(&file_path); let file = match unsafe { memmap2::Mmap::map(&file) } { Ok(mmap) => mmap, Err(err) => { @@ -25,8 +26,51 @@ fn main() { process::exit(1); } }; + let extra_files: Vec<_> = extra_files + .into_iter() + .map( + |subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } { + Ok(mmap) => mmap, + Err(err) => { + eprintln!("Failed to map file '{}': {}", file_path, err,); + process::exit(1); + } + }, + ) + .collect(); + let extra_file_data: Vec<&[u8]> = extra_files.iter().map(|f| &**f).collect(); let stdout = io::stdout(); let stderr = io::stderr(); - objdump::print(&mut stdout.lock(), &mut stderr.lock(), &*file, member_names).unwrap(); + objdump::print( + &mut stdout.lock(), + &mut stderr.lock(), + &*file, + &extra_file_data, + member_names, + ) + .unwrap(); +} + +// If the file is a dyld shared cache, and we're on macOS 12 or later, +// then there will be one or more "subcache" files next to this file, +// with the names filename.1, filename.2 etc. +// Read those files now, if they exist, even if we don't know that +// we're dealing with a dyld shared cache. By the time we know what +// we're dealing with, it's too late to read more files. +fn open_subcaches_if_exist(path: &str) -> Vec { + let mut files = Vec::new(); + for i in 1.. { + let subcache_path = format!("{}.{}", path, i); + match fs::File::open(&subcache_path) { + Ok(subcache_file) => files.push(subcache_file), + Err(_) => break, + }; + } + let symbols_subcache_path = format!("{}.symbols", path); + if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) { + files.push(subcache_file); + }; + println!("have {} extra files", files.len()); + files } diff --git a/crates/examples/src/objdump.rs b/crates/examples/src/objdump.rs index e01b88eb..1dd672b6 100644 --- a/crates/examples/src/objdump.rs +++ b/crates/examples/src/objdump.rs @@ -7,6 +7,7 @@ pub fn print( w: &mut W, e: &mut E, file: &[u8], + extra_files: &[&[u8]], member_names: Vec, ) -> Result<()> { let mut member_names: Vec<_> = member_names.into_iter().map(|name| (name, false)).collect(); @@ -47,7 +48,7 @@ pub fn print( Err(err) => writeln!(e, "Failed to parse Fat 64 data: {}", err)?, } } - } else if let Ok(cache) = DyldCache::::parse(&*file) { + } else if let Ok(cache) = DyldCache::::parse(&*file, extra_files) { writeln!(w, "Format: dyld cache {:?}-endian", cache.endianness())?; writeln!(w, "Architecture: {:?}", cache.architecture())?; for image in cache.images() { diff --git a/crates/examples/tests/testfiles.rs b/crates/examples/tests/testfiles.rs index 6fe22ee0..8e854571 100644 --- a/crates/examples/tests/testfiles.rs +++ b/crates/examples/tests/testfiles.rs @@ -28,7 +28,7 @@ fn testfiles() { println!("File {}", path); let data = fs::read(&path).unwrap(); fail |= testfile(path, &data, "objdump", |mut out, mut err, data| { - objdump::print(&mut out, &mut err, data, vec![]).unwrap() + objdump::print(&mut out, &mut err, data, &[], vec![]).unwrap() }); fail |= testfile(path, &data, "readobj", readobj::print); println!(); diff --git a/src/macho.rs b/src/macho.rs index 9d81f7d6..86a5bf62 100644 --- a/src/macho.rs +++ b/src/macho.rs @@ -284,26 +284,67 @@ pub const VM_PROT_EXECUTE: u32 = 0x04; // Definitions from https://opensource.apple.com/source/dyld/dyld-210.2.3/launch-cache/dyld_cache_format.h.auto.html -/// The dyld cache header, containing only the fields which are present -/// in all versions of dyld caches (dyld-95.3 and up). -/// Many more fields exist in later dyld versions, but we currently do -/// not need to parse those. +/// The dyld cache header. /// Corresponds to struct dyld_cache_header from dyld_cache_format.h. +/// This header has grown over time. Only the fields up to and including dyld_base_address +/// are guaranteed to be present. For all other fields, check the header size before +/// accessing the field. The header size is stored in mapping_offset; the mappings start +/// right after the theader. #[derive(Debug, Clone, Copy)] #[repr(C)] pub struct DyldCacheHeader { /// e.g. "dyld_v0 i386" pub magic: [u8; 16], /// file offset to first dyld_cache_mapping_info - pub mapping_offset: U32, + pub mapping_offset: U32, // offset: 0x10 /// number of dyld_cache_mapping_info entries - pub mapping_count: U32, + pub mapping_count: U32, // offset: 0x14 /// file offset to first dyld_cache_image_info - pub images_offset: U32, + pub images_offset: U32, // offset: 0x18 /// number of dyld_cache_image_info entries - pub images_count: U32, + pub images_count: U32, // offset: 0x1c /// base address of dyld when cache was built - pub dyld_base_address: U64, + pub dyld_base_address: U64, // offset: 0x20 + /// + reserved1: [u8; 32], // offset: 0x28 + /// file offset of where local symbols are stored + pub local_symbols_offset: U64, // offset: 0x48 + /// size of local symbols information + pub local_symbols_size: U64, // offset: 0x50 + /// unique value for each shared cache file + pub uuid: [u8; 16], // offset: 0x58 + /// + reserved2: [u8; 32], // offset: 0x68 + /// + reserved3: [u8; 32], // offset: 0x88 + /// + reserved4: [u8; 32], // offset: 0xa8 + /// + reserved5: [u8; 32], // offset: 0xc8 + /// + reserved6: [u8; 32], // offset: 0xe8 + /// + reserved7: [u8; 32], // offset: 0x108 + /// + reserved8: [u8; 32], // offset: 0x128 + /// + reserved9: [u8; 32], // offset: 0x148 + /// + reserved10: [u8; 32], // offset: 0x168 + /// file offset to first dyld_subcache_info + pub subcaches_offset: U32, // offset: 0x188 + /// number of dyld_subcache_info entries + pub subcaches_count: U32, // offset: 0x18c + /// the UUID of the .symbols subcache + pub symbols_subcache_uuid: [u8; 16], // offset: 0x190 + /// + reserved11: [u8; 32], // offset: 0x1a0 + /// file offset to first dyld_cache_image_info + /// Use this instead of images_offset if mapping_offset is at least 0x1c4. + pub images_across_all_subcaches_offset: U32, // offset: 0x1c0 + /// number of dyld_cache_image_info entries + /// Use this instead of images_count if mapping_offset is at least 0x1c4. + pub images_across_all_subcaches_count: U32, // offset: 0x1c4 } /// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h. @@ -338,6 +379,17 @@ pub struct DyldCacheImageInfo { pub pad: U32, } +/// Corresponds to a struct whose source code has not been published as of Nov 2021. +/// Added in the dyld cache version which shipped with macOS 12 / iOS 15. +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub struct DyldSubCacheInfo { + /// The UUID of this subcache. + pub uuid: [u8; 16], + /// The size of this subcache plus all previous subcaches. + pub cumulative_size: U64, +} + // Definitions from "/usr/include/mach-o/loader.h". /* @@ -3199,6 +3251,7 @@ unsafe_impl_endian_pod!( DyldCacheHeader, DyldCacheMappingInfo, DyldCacheImageInfo, + DyldSubCacheInfo, MachHeader32, MachHeader64, LoadCommand, diff --git a/src/read/any.rs b/src/read/any.rs index 940cf278..ce99bc31 100644 --- a/src/read/any.rs +++ b/src/read/any.rs @@ -20,7 +20,7 @@ use crate::read::{ SymbolMapName, SymbolScope, SymbolSection, }; #[allow(unused_imports)] -use crate::Endianness; +use crate::{AddressSize, Endian, Endianness}; /// Evaluate an expression on the contents of a file format enum. /// @@ -220,23 +220,21 @@ impl<'data, R: ReadRef<'data>> File<'data, R> { Ok(File { inner }) } - /// Parse the raw file data at an arbitrary offset inside the input data. - /// - /// Currently, this is only supported for Mach-O images. - /// This can be used for parsing Mach-O images inside the dyld shared cache, - /// where multiple images, located at different offsets, share the same address - /// space. - pub fn parse_at(data: R, offset: u64) -> Result { - let _inner = match FileKind::parse_at(data, offset)? { - #[cfg(feature = "macho")] - FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?), - #[cfg(feature = "macho")] - FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?), - #[allow(unreachable_patterns)] + /// Parse a Mach-O image from the dyld shared cache. + #[cfg(feature = "macho")] + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &macho::DyldCacheImage<'data, 'cache, E, R>, + ) -> Result { + let inner = match image.cache.architecture().address_size() { + Some(AddressSize::U64) => { + FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?) + } + Some(AddressSize::U32) => { + FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?) + } _ => return Err(Error("Unsupported file format")), }; - #[allow(unreachable_code)] - Ok(File { inner: _inner }) + Ok(File { inner }) } /// Return the file format. diff --git a/src/read/macho/dyld_cache.rs b/src/read/macho/dyld_cache.rs index ee758ce0..52a2f7c0 100644 --- a/src/read/macho/dyld_cache.rs +++ b/src/read/macho/dyld_cache.rs @@ -1,3 +1,4 @@ +use alloc::vec::Vec; use core::slice; use crate::read::{Error, File, ReadError, ReadRef, Result}; @@ -12,26 +13,89 @@ where { endian: E, data: R, + subcaches: Vec>, + symbols_subcache: Option>, header: &'data macho::DyldCacheHeader, mappings: &'data [macho::DyldCacheMappingInfo], images: &'data [macho::DyldCacheImageInfo], arch: Architecture, } +/// Information about a subcache. +#[derive(Debug)] +pub struct DyldSubCache<'data, E = Endianness, R = &'data [u8]> +where + E: Endian, + R: ReadRef<'data>, +{ + data: R, + mappings: &'data [macho::DyldCacheMappingInfo], +} + +// This is the offset of the images_across_all_subcaches_count field. +const MIN_HEADER_SIZE_SUBCACHES: u32 = 0x1c4; + impl<'data, E, R> DyldCache<'data, E, R> where E: Endian, R: ReadRef<'data>, { /// Parse the raw dyld shared cache data. - pub fn parse(data: R) -> Result { + /// For shared caches from macOS 12 / iOS 15 and above, the subcache files need to be + /// supplied as well, in the correct order, with the .symbols subcache last (if present). + /// For example, data would be the data for dyld_shared_cache_x86_64, + /// and subcache_data would be the data for [dyld_shared_cache_x86_64.1, dyld_shared_cache_x86_64.2, ...] + pub fn parse(data: R, subcache_data: &[R]) -> Result { let header = macho::DyldCacheHeader::parse(data)?; let (arch, endian) = header.parse_magic()?; let mappings = header.mappings(endian, data)?; + + let symbols_subcache_uuid = header.symbols_subcache_uuid(endian); + let subcaches_info = header.subcaches(endian, data)?.unwrap_or(&[]); + + if subcache_data.len() != subcaches_info.len() + symbols_subcache_uuid.is_some() as usize { + return Err(Error("Incorrect number of SubCaches")); + } + + // Split out the .symbols subcache data from the other subcaches. + let (symbols_subcache_data_and_uuid, subcache_data) = + if let Some(symbols_uuid) = symbols_subcache_uuid { + let (sym_data, rest_data) = subcache_data.split_last().unwrap(); + (Some((*sym_data, symbols_uuid)), rest_data) + } else { + (None, subcache_data) + }; + + // Read the regular SubCaches (.1, .2, ...), if present. + let mut subcaches = Vec::new(); + for (&data, info) in subcache_data.iter().zip(subcaches_info.iter()) { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != info.uuid { + return Err(Error("Unexpected SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + subcaches.push(DyldSubCache { data, mappings }); + } + + // Read the .symbols SubCache, if present. + let symbols_subcache = match symbols_subcache_data_and_uuid { + Some((data, uuid)) => { + let sc_header = macho::DyldCacheHeader::::parse(data)?; + if sc_header.uuid != uuid { + return Err(Error("Unexpected .symbols SubCache UUID")); + } + let mappings = sc_header.mappings(endian, data)?; + Some(DyldSubCache { data, mappings }) + } + None => None, + }; + let images = header.images(endian, data)?; Ok(DyldCache { endian, data, + subcaches, + symbols_subcache, header, mappings, images, @@ -66,6 +130,22 @@ where iter: self.images.iter(), } } + + /// Find the address in a mapping and return the cache or subcache data it was found in, + /// together with the translated file offset. + pub fn data_and_offset_for_address(&self, address: u64) -> Option<(R, u64)> { + if let Some(file_offset) = address_to_file_offset(address, self.endian, self.mappings) { + return Some((self.data, file_offset)); + } + for subcache in &self.subcaches { + if let Some(file_offset) = + address_to_file_offset(address, self.endian, subcache.mappings) + { + return Some((subcache.data, file_offset)); + } + } + None + } } /// An iterator over all the images (dylibs) in the dyld shared cache. @@ -84,14 +164,12 @@ where E: Endian, R: ReadRef<'data>, { - type Item = DyldCacheImage<'data, E, R>; + type Item = DyldCacheImage<'data, 'cache, E, R>; - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { let image_info = self.iter.next()?; Some(DyldCacheImage { - endian: self.cache.endian, - data: self.cache.data, - mappings: self.cache.mappings, + cache: self.cache, image_info, }) } @@ -99,38 +177,39 @@ where /// One image (dylib) from inside the dyld shared cache. #[derive(Debug)] -pub struct DyldCacheImage<'data, E = Endianness, R = &'data [u8]> +pub struct DyldCacheImage<'data, 'cache, E = Endianness, R = &'data [u8]> where E: Endian, R: ReadRef<'data>, { - endian: E, - data: R, - mappings: &'data [macho::DyldCacheMappingInfo], + pub(crate) cache: &'cache DyldCache<'data, E, R>, image_info: &'data macho::DyldCacheImageInfo, } -impl<'data, E, R> DyldCacheImage<'data, E, R> +impl<'data, 'cache, E, R> DyldCacheImage<'data, 'cache, E, R> where E: Endian, R: ReadRef<'data>, { /// The file system path of this image. pub fn path(&self) -> Result<&'data str> { - let path = self.image_info.path(self.endian, self.data)?; + let path = self.image_info.path(self.cache.endian, self.cache.data)?; // The path should always be ascii, so from_utf8 should alway succeed. let path = core::str::from_utf8(path).map_err(|_| Error("Path string not valid utf-8"))?; Ok(path) } /// The offset in the dyld cache file where this image starts. - pub fn file_offset(&self) -> Result { - self.image_info.file_offset(self.endian, self.mappings) + pub fn image_data_and_offset(&self) -> Result<(R, u64)> { + let address = self.image_info.address.get(self.cache.endian); + self.cache + .data_and_offset_for_address(address) + .ok_or(Error("Address not found in any mapping")) } /// Parse this image into an Object. pub fn parse_object(&self) -> Result> { - File::parse_at(self.data, self.file_offset()?) + File::parse_dyld_cache_image(self) } } @@ -175,17 +254,55 @@ impl macho::DyldCacheHeader { .read_error("Invalid dyld cache mapping size or alignment") } + /// Return the information about subcaches, if present. + pub fn subcaches<'data, R: ReadRef<'data>>( + &self, + endian: E, + data: R, + ) -> Result]>> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let subcaches = data + .read_slice_at::>( + self.subcaches_offset.get(endian).into(), + self.subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld subcaches size or alignment")?; + Ok(Some(subcaches)) + } else { + Ok(None) + } + } + + /// Return the UUID for the .symbols subcache, if present. + pub fn symbols_subcache_uuid(&self, endian: E) -> Option<[u8; 16]> { + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + let uuid = self.symbols_subcache_uuid; + if uuid != [0; 16] { + return Some(uuid); + } + } + None + } + /// Return the image information table. pub fn images<'data, R: ReadRef<'data>>( &self, endian: E, data: R, ) -> Result<&'data [macho::DyldCacheImageInfo]> { - data.read_slice_at::>( - self.images_offset.get(endian).into(), - self.images_count.get(endian) as usize, - ) - .read_error("Invalid dyld cache image size or alignment") + if self.mapping_offset.get(endian) >= MIN_HEADER_SIZE_SUBCACHES { + data.read_slice_at::>( + self.images_across_all_subcaches_offset.get(endian).into(), + self.images_across_all_subcaches_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } else { + data.read_slice_at::>( + self.images_offset.get(endian).into(), + self.images_count.get(endian) as usize, + ) + .read_error("Invalid dyld cache image size or alignment") + } } } @@ -205,14 +322,26 @@ impl macho::DyldCacheImageInfo { mappings: &[macho::DyldCacheMappingInfo], ) -> Result { let address = self.address.get(endian); - for mapping in mappings { - let mapping_address = mapping.address.get(endian); - if address >= mapping_address - && address < mapping_address.wrapping_add(mapping.size.get(endian)) - { - return Ok(address - mapping_address + mapping.file_offset.get(endian)); - } + match address_to_file_offset(address, endian, mappings) { + Some(file_offset) => Ok(file_offset), + None => Err(Error("Invalid dyld cache image address")), + } + } +} + +/// Find the file offset of the image by looking up its address in the mappings. +pub fn address_to_file_offset( + address: u64, + endian: E, + mappings: &[macho::DyldCacheMappingInfo], +) -> Option { + for mapping in mappings { + let mapping_address = mapping.address.get(endian); + if address >= mapping_address + && address < mapping_address.wrapping_add(mapping.size.get(endian)) + { + return Some(address - mapping_address + mapping.file_offset.get(endian)); } - Err(Error("Invalid dyld cache image address")) } + None } diff --git a/src/read/macho/file.rs b/src/read/macho/file.rs index 0d4961b1..559f001a 100644 --- a/src/read/macho/file.rs +++ b/src/read/macho/file.rs @@ -10,9 +10,9 @@ use crate::read::{ use crate::{endian, macho, BigEndian, ByteString, Endian, Endianness, Pod}; use super::{ - LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, MachOSegment, - MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, MachOSymbolTable, Nlist, Section, - Segment, SymbolTable, + DyldCacheImage, LoadCommandIterator, MachOSection, MachOSectionInternal, MachOSectionIterator, + MachOSegment, MachOSegmentInternal, MachOSegmentIterator, MachOSymbol, MachOSymbolIterator, + MachOSymbolTable, Nlist, Section, Segment, SymbolTable, }; /// A 32-bit Mach-O object file. @@ -35,6 +35,7 @@ where pub(super) data: R, pub(super) header_offset: u64, pub(super) header: &'data Mach, + pub(super) segments: Vec>, pub(super) sections: Vec>, pub(super) symbols: SymbolTable<'data, Mach, R>, } @@ -46,38 +47,95 @@ where { /// Parse the raw Mach-O file data. pub fn parse(data: R) -> Result { - Self::parse_at(data, 0) + let header = Mach::parse(data, 0)?; + let endian = header.endian()?; + + // Build a list of segments and sections to make some operations more efficient. + let mut segments = Vec::new(); + let mut sections = Vec::new(); + let mut symbols = SymbolTable::default(); + if let Ok(mut commands) = header.load_commands(endian, data, 0) { + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { + let index = SectionIndex(sections.len() + 1); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); + } + } else if let Some(symtab) = command.symtab()? { + symbols = symtab.symbols(endian, data)?; + } + } + } + + Ok(MachOFile { + endian, + data, + header_offset: 0, + header, + segments, + sections, + symbols, + }) } - /// Parse the raw Mach-O file data at an arbitrary offset inside the input data. - /// This can be used for parsing Mach-O images inside the dyld shared cache, - /// where multiple images, located at different offsets, share the same address - /// space. - pub fn parse_at(data: R, header_offset: u64) -> Result { + /// Parse the Mach-O file for the given image from the dyld shared cache. + /// This will read different sections from different subcaches, if necessary. + pub fn parse_dyld_cache_image<'cache, E: Endian>( + image: &DyldCacheImage<'data, 'cache, E, R>, + ) -> Result { + let (data, header_offset) = image.image_data_and_offset()?; let header = Mach::parse(data, header_offset)?; let endian = header.endian()?; - let mut symbols = SymbolTable::default(); // Build a list of sections to make some operations more efficient. + // Also build a list of segments, because we need to remember which ReadRef + // to read each section's data from. Only the DyldCache knows this information, + // and we won't have access to it once we've exited this function. + let mut segments = Vec::new(); let mut sections = Vec::new(); + let mut linkedit_data: Option = None; + let mut symtab = None; if let Ok(mut commands) = header.load_commands(endian, data, header_offset) { while let Ok(Some(command)) = commands.next() { if let Some((segment, section_data)) = Mach::Segment::from_command(command)? { + // Each segment can be stored in a different subcache. Get the segment's + // address and look it up in the cache mappings, to find the correct cache data. + let addr = segment.vmaddr(endian).into(); + let (data, _offset) = image + .cache + .data_and_offset_for_address(addr) + .read_error("Could not find segment data in dyld shared cache")?; + if segment.name() == macho::SEG_LINKEDIT.as_bytes() { + linkedit_data = Some(data); + } + let segment_index = segments.len(); + segments.push(MachOSegmentInternal { segment, data }); + for section in segment.sections(endian, section_data)? { let index = SectionIndex(sections.len() + 1); - sections.push(MachOSectionInternal::parse(index, section)); + sections.push(MachOSectionInternal::parse(index, segment_index, section)); } - } else if let Some(symtab) = command.symtab()? { - symbols = symtab.symbols(endian, data)?; + } else if let Some(st) = command.symtab()? { + symtab = Some(st); } } } + // The symbols are found in the __LINKEDIT segment, so make sure to read them from the + // correct subcache. + let symbols = match (symtab, linkedit_data) { + (Some(symtab), Some(linkedit_data)) => symtab.symbols(endian, linkedit_data)?, + _ => SymbolTable::default(), + }; + Ok(MachOFile { endian, data, header_offset, header, + segments, sections, symbols, }) @@ -95,6 +153,15 @@ where .and_then(|index| self.sections.get(index)) .read_error("Invalid Mach-O section index") } + + pub(super) fn segment_internal( + &self, + index: usize, + ) -> Result<&MachOSegmentInternal<'data, Mach, R>> { + self.segments + .get(index) + .read_error("Invalid Mach-O segment index") + } } impl<'data, Mach, R> read::private::Sealed for MachOFile<'data, Mach, R> @@ -155,11 +222,7 @@ where fn segments(&'file self) -> MachOSegmentIterator<'data, 'file, Mach, R> { MachOSegmentIterator { file: self, - commands: self - .header - .load_commands(self.endian, self.data, self.header_offset) - .ok() - .unwrap_or_else(Default::default), + iter: self.segments.iter(), } } diff --git a/src/read/macho/section.rs b/src/read/macho/section.rs index 3a5a22eb..9e71aa8f 100644 --- a/src/read/macho/section.rs +++ b/src/read/macho/section.rs @@ -80,9 +80,11 @@ where R: ReadRef<'data>, { fn bytes(&self) -> Result<&'data [u8]> { + let segment_index = self.internal.segment_index; + let segment = self.file.segment_internal(segment_index)?; self.internal .section - .data(self.file.endian, self.file.data) + .data(self.file.endian, segment.data) .read_error("Invalid Mach-O section size or offset") } } @@ -202,12 +204,17 @@ where #[derive(Debug, Clone, Copy)] pub(super) struct MachOSectionInternal<'data, Mach: MachHeader> { pub index: SectionIndex, + pub segment_index: usize, pub kind: SectionKind, pub section: &'data Mach::Section, } impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { - pub(super) fn parse(index: SectionIndex, section: &'data Mach::Section) -> Self { + pub(super) fn parse( + index: SectionIndex, + segment_index: usize, + section: &'data Mach::Section, + ) -> Self { // TODO: we don't validate flags, should we? let kind = match (section.segment_name(), section.name()) { (b"__TEXT", b"__text") => SectionKind::Text, @@ -230,6 +237,7 @@ impl<'data, Mach: MachHeader> MachOSectionInternal<'data, Mach> { }; MachOSectionInternal { index, + segment_index, kind, section, } diff --git a/src/read/macho/segment.rs b/src/read/macho/segment.rs index 3c2d9649..3a09379d 100644 --- a/src/read/macho/segment.rs +++ b/src/read/macho/segment.rs @@ -1,12 +1,12 @@ use core::fmt::Debug; -use core::{result, str}; +use core::{result, slice, str}; use crate::endian::{self, Endianness}; use crate::macho; use crate::pod::Pod; use crate::read::{self, ObjectSegment, ReadError, ReadRef, Result}; -use super::{LoadCommandData, LoadCommandIterator, MachHeader, MachOFile, Section}; +use super::{LoadCommandData, MachHeader, MachOFile, Section}; /// An iterator over the segments of a `MachOFile32`. pub type MachOSegmentIterator32<'data, 'file, Endian = Endianness, R = &'data [u8]> = @@ -24,7 +24,7 @@ where R: ReadRef<'data>, { pub(super) file: &'file MachOFile<'data, Mach, R>, - pub(super) commands: LoadCommandIterator<'data, Mach::Endian>, + pub(super) iter: slice::Iter<'file, MachOSegmentInternal<'data, Mach, R>>, } impl<'data, 'file, Mach, R> Iterator for MachOSegmentIterator<'data, 'file, Mach, R> @@ -35,15 +35,10 @@ where type Item = MachOSegment<'data, 'file, Mach, R>; fn next(&mut self) -> Option { - loop { - let command = self.commands.next().ok()??; - if let Ok(Some((segment, _))) = Mach::Segment::from_command(command) { - return Some(MachOSegment { - file: self.file, - segment, - }); - } - } + self.iter.next().map(|internal| MachOSegment { + file: self.file, + internal, + }) } } @@ -63,7 +58,7 @@ where R: ReadRef<'data>, { file: &'file MachOFile<'data, Mach, R>, - segment: &'data Mach::Segment, + internal: &'file MachOSegmentInternal<'data, Mach, R>, } impl<'data, 'file, Mach, R> MachOSegment<'data, 'file, Mach, R> @@ -72,7 +67,8 @@ where R: ReadRef<'data>, { fn bytes(&self) -> Result<&'data [u8]> { - self.segment + self.internal + .segment .data(self.file.endian, self.file.data) .read_error("Invalid Mach-O segment size or offset") } @@ -92,12 +88,12 @@ where { #[inline] fn address(&self) -> u64 { - self.segment.vmaddr(self.file.endian).into() + self.internal.segment.vmaddr(self.file.endian).into() } #[inline] fn size(&self) -> u64 { - self.segment.vmsize(self.file.endian).into() + self.internal.segment.vmsize(self.file.endian).into() } #[inline] @@ -108,7 +104,7 @@ where #[inline] fn file_range(&self) -> (u64, u64) { - self.segment.file_range(self.file.endian) + self.internal.segment.file_range(self.file.endian) } fn data(&self) -> Result<&'data [u8]> { @@ -126,19 +122,25 @@ where #[inline] fn name_bytes(&self) -> Result> { - Ok(Some(self.segment.name())) + Ok(Some(self.internal.segment.name())) } #[inline] fn name(&self) -> Result> { Ok(Some( - str::from_utf8(self.segment.name()) + str::from_utf8(self.internal.segment.name()) .ok() .read_error("Non UTF-8 Mach-O segment name")?, )) } } +#[derive(Debug, Clone, Copy)] +pub(super) struct MachOSegmentInternal<'data, Mach: MachHeader, R: ReadRef<'data>> { + pub data: R, + pub segment: &'data Mach::Segment, +} + /// A trait for generic access to `SegmentCommand32` and `SegmentCommand64`. #[allow(missing_docs)] pub trait Segment: Debug + Pod {