Skip to content

Commit

Permalink
Add support for split dyld shared cache. (gimli-rs#398)
Browse files Browse the repository at this point in the history
Fixes gimli-rs#358.

This adds support for the dyld cache format that is used on macOS 12 and
iOS 15. The cache is split over multiple files, with a "root" cache
and one or more subcaches, for example:

```
/System/Library/dyld/dyld_shared_cache_x86_64
/System/Library/dyld/dyld_shared_cache_x86_64.1
/System/Library/dyld/dyld_shared_cache_x86_64.2
/System/Library/dyld/dyld_shared_cache_x86_64.3
```

Additionally, on iOS, there is a separate .symbols subcache, which
contains local symbols.

Each file has a set of mappings. For each image in the cache, the
segments of that image can be distributed over multiple files: For
example, on macOS 12.0.1, the image for libsystem_malloc.dylib for the
arm64e architecture has its __TEXT segment in the root cache and the
__LINKEDIT segment in the .1 subcache - there's a single __LINKEDIT
segment which is shared between all images across both files. The
remaining libsystem_malloc.dylib segments are in the same file as the
__TEXT segment.

The DyldCache API now requires the data for all subcaches to be supplied
to the constructor.

The parse_at methods have been removed and been replaced with a
parse_dyld_cache_image method.

With this patch, the following command outputs correct symbols for
libsystem_malloc.dylib:

```
cargo run --release --bin objdump -- /System/Library/dyld/dyld_shared_cache_arm64e /usr/lib/system/libsystem_malloc.dylib
```

Support for local symbols is not implemented. But, as a first step,
DyldCache::parse requires the .symbols subcache to be supplied (if the
root cache expects one to be present) and checks that its UUID is correct.
MachOFile doesn't do anything with ilocalsym and nlocalsym yet, and we
don't yet have the struct definitions for dyld_cache_local_symbols_info
and dyld_cache_local_symbols_entry.
  • Loading branch information
mstange authored Nov 27, 2021
1 parent 81ae181 commit 522b160
Show file tree
Hide file tree
Showing 10 changed files with 433 additions and 97 deletions.
42 changes: 41 additions & 1 deletion crates/examples/src/bin/dyldcachedump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,34 @@ fn main() {
continue;
}
};
let subcache_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
println!("Failed to map file '{}': {}", file_path, err,);
continue;
}
};
let cache = match DyldCache::<Endianness>::parse(&*file) {
let subcache_files: Option<Vec<_>> = subcache_files
.into_iter()
.map(
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
Ok(mmap) => Some(mmap),
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err);
None
}
},
)
.collect();
let subcache_files: Vec<&[u8]> = match &subcache_files {
Some(subcache_files) => subcache_files
.iter()
.map(|subcache_file| &**subcache_file)
.collect(),
None => continue,
};
let cache = match DyldCache::<Endianness>::parse(&*file, &subcache_files) {
Ok(cache) => cache,
Err(err) => {
println!(
Expand All @@ -48,3 +68,23 @@ fn main() {
}
}
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2, ..., filename.symbols.
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
match fs::File::open(&subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
let symbols_subcache_path = format!("{}.symbols", path);
if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) {
files.push(subcache_file);
};
println!("Found {} subcache files", files.len());
files
}
45 changes: 44 additions & 1 deletion crates/examples/src/bin/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,58 @@ fn main() {
process::exit(1);
}
};
let extra_files = open_subcaches_if_exist(&file_path);
let file = match unsafe { memmap2::Mmap::map(&file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
};
let extra_files: Vec<_> = extra_files
.into_iter()
.map(
|subcache_file| match unsafe { memmap2::Mmap::map(&subcache_file) } {
Ok(mmap) => mmap,
Err(err) => {
eprintln!("Failed to map file '{}': {}", file_path, err,);
process::exit(1);
}
},
)
.collect();
let extra_file_data: Vec<&[u8]> = extra_files.iter().map(|f| &**f).collect();

let stdout = io::stdout();
let stderr = io::stderr();
objdump::print(&mut stdout.lock(), &mut stderr.lock(), &*file, member_names).unwrap();
objdump::print(
&mut stdout.lock(),
&mut stderr.lock(),
&*file,
&extra_file_data,
member_names,
)
.unwrap();
}

// If the file is a dyld shared cache, and we're on macOS 12 or later,
// then there will be one or more "subcache" files next to this file,
// with the names filename.1, filename.2 etc.
// Read those files now, if they exist, even if we don't know that
// we're dealing with a dyld shared cache. By the time we know what
// we're dealing with, it's too late to read more files.
fn open_subcaches_if_exist(path: &str) -> Vec<fs::File> {
let mut files = Vec::new();
for i in 1.. {
let subcache_path = format!("{}.{}", path, i);
match fs::File::open(&subcache_path) {
Ok(subcache_file) => files.push(subcache_file),
Err(_) => break,
};
}
let symbols_subcache_path = format!("{}.symbols", path);
if let Ok(subcache_file) = fs::File::open(&symbols_subcache_path) {
files.push(subcache_file);
};
files
}
3 changes: 2 additions & 1 deletion crates/examples/src/objdump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ pub fn print<W: Write, E: Write>(
w: &mut W,
e: &mut E,
file: &[u8],
extra_files: &[&[u8]],
member_names: Vec<String>,
) -> Result<()> {
let mut member_names: Vec<_> = member_names.into_iter().map(|name| (name, false)).collect();
Expand Down Expand Up @@ -47,7 +48,7 @@ pub fn print<W: Write, E: Write>(
Err(err) => writeln!(e, "Failed to parse Fat 64 data: {}", err)?,
}
}
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file) {
} else if let Ok(cache) = DyldCache::<Endianness>::parse(&*file, extra_files) {
writeln!(w, "Format: dyld cache {:?}-endian", cache.endianness())?;
writeln!(w, "Architecture: {:?}", cache.architecture())?;
for image in cache.images() {
Expand Down
2 changes: 1 addition & 1 deletion crates/examples/tests/testfiles.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fn testfiles() {
println!("File {}", path);
let data = fs::read(&path).unwrap();
fail |= testfile(path, &data, "objdump", |mut out, mut err, data| {
objdump::print(&mut out, &mut err, data, vec![]).unwrap()
objdump::print(&mut out, &mut err, data, &[], vec![]).unwrap()
});
fail |= testfile(path, &data, "readobj", readobj::print);
println!();
Expand Down
71 changes: 62 additions & 9 deletions src/macho.rs
Original file line number Diff line number Diff line change
Expand Up @@ -284,26 +284,67 @@ pub const VM_PROT_EXECUTE: u32 = 0x04;

// Definitions from https://opensource.apple.com/source/dyld/dyld-210.2.3/launch-cache/dyld_cache_format.h.auto.html

/// The dyld cache header, containing only the fields which are present
/// in all versions of dyld caches (dyld-95.3 and up).
/// Many more fields exist in later dyld versions, but we currently do
/// not need to parse those.
/// The dyld cache header.
/// Corresponds to struct dyld_cache_header from dyld_cache_format.h.
/// This header has grown over time. Only the fields up to and including dyld_base_address
/// are guaranteed to be present. For all other fields, check the header size before
/// accessing the field. The header size is stored in mapping_offset; the mappings start
/// right after the theader.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldCacheHeader<E: Endian> {
/// e.g. "dyld_v0 i386"
pub magic: [u8; 16],
/// file offset to first dyld_cache_mapping_info
pub mapping_offset: U32<E>,
pub mapping_offset: U32<E>, // offset: 0x10
/// number of dyld_cache_mapping_info entries
pub mapping_count: U32<E>,
pub mapping_count: U32<E>, // offset: 0x14
/// file offset to first dyld_cache_image_info
pub images_offset: U32<E>,
pub images_offset: U32<E>, // offset: 0x18
/// number of dyld_cache_image_info entries
pub images_count: U32<E>,
pub images_count: U32<E>, // offset: 0x1c
/// base address of dyld when cache was built
pub dyld_base_address: U64<E>,
pub dyld_base_address: U64<E>, // offset: 0x20
///
reserved1: [u8; 32], // offset: 0x28
/// file offset of where local symbols are stored
pub local_symbols_offset: U64<E>, // offset: 0x48
/// size of local symbols information
pub local_symbols_size: U64<E>, // offset: 0x50
/// unique value for each shared cache file
pub uuid: [u8; 16], // offset: 0x58
///
reserved2: [u8; 32], // offset: 0x68
///
reserved3: [u8; 32], // offset: 0x88
///
reserved4: [u8; 32], // offset: 0xa8
///
reserved5: [u8; 32], // offset: 0xc8
///
reserved6: [u8; 32], // offset: 0xe8
///
reserved7: [u8; 32], // offset: 0x108
///
reserved8: [u8; 32], // offset: 0x128
///
reserved9: [u8; 32], // offset: 0x148
///
reserved10: [u8; 32], // offset: 0x168
/// file offset to first dyld_subcache_info
pub subcaches_offset: U32<E>, // offset: 0x188
/// number of dyld_subcache_info entries
pub subcaches_count: U32<E>, // offset: 0x18c
/// the UUID of the .symbols subcache
pub symbols_subcache_uuid: [u8; 16], // offset: 0x190
///
reserved11: [u8; 32], // offset: 0x1a0
/// file offset to first dyld_cache_image_info
/// Use this instead of images_offset if mapping_offset is at least 0x1c4.
pub images_across_all_subcaches_offset: U32<E>, // offset: 0x1c0
/// number of dyld_cache_image_info entries
/// Use this instead of images_count if mapping_offset is at least 0x1c4.
pub images_across_all_subcaches_count: U32<E>, // offset: 0x1c4
}

/// Corresponds to struct dyld_cache_mapping_info from dyld_cache_format.h.
Expand Down Expand Up @@ -338,6 +379,17 @@ pub struct DyldCacheImageInfo<E: Endian> {
pub pad: U32<E>,
}

/// Corresponds to a struct whose source code has not been published as of Nov 2021.
/// Added in the dyld cache version which shipped with macOS 12 / iOS 15.
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct DyldSubCacheInfo<E: Endian> {
/// The UUID of this subcache.
pub uuid: [u8; 16],
/// The size of this subcache plus all previous subcaches.
pub cumulative_size: U64<E>,
}

// Definitions from "/usr/include/mach-o/loader.h".

/*
Expand Down Expand Up @@ -3199,6 +3251,7 @@ unsafe_impl_endian_pod!(
DyldCacheHeader,
DyldCacheMappingInfo,
DyldCacheImageInfo,
DyldSubCacheInfo,
MachHeader32,
MachHeader64,
LoadCommand,
Expand Down
30 changes: 14 additions & 16 deletions src/read/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::read::{
SymbolMapName, SymbolScope, SymbolSection,
};
#[allow(unused_imports)]
use crate::Endianness;
use crate::{AddressSize, Endian, Endianness};

/// Evaluate an expression on the contents of a file format enum.
///
Expand Down Expand Up @@ -220,23 +220,21 @@ impl<'data, R: ReadRef<'data>> File<'data, R> {
Ok(File { inner })
}

/// Parse the raw file data at an arbitrary offset inside the input data.
///
/// Currently, this is only supported for Mach-O images.
/// This can be used for parsing Mach-O images inside the dyld shared cache,
/// where multiple images, located at different offsets, share the same address
/// space.
pub fn parse_at(data: R, offset: u64) -> Result<Self> {
let _inner = match FileKind::parse_at(data, offset)? {
#[cfg(feature = "macho")]
FileKind::MachO32 => FileInternal::MachO32(macho::MachOFile32::parse_at(data, offset)?),
#[cfg(feature = "macho")]
FileKind::MachO64 => FileInternal::MachO64(macho::MachOFile64::parse_at(data, offset)?),
#[allow(unreachable_patterns)]
/// Parse a Mach-O image from the dyld shared cache.
#[cfg(feature = "macho")]
pub fn parse_dyld_cache_image<'cache, E: Endian>(
image: &macho::DyldCacheImage<'data, 'cache, E, R>,
) -> Result<Self> {
let inner = match image.cache.architecture().address_size() {
Some(AddressSize::U64) => {
FileInternal::MachO64(macho::MachOFile64::parse_dyld_cache_image(image)?)
}
Some(AddressSize::U32) => {
FileInternal::MachO32(macho::MachOFile32::parse_dyld_cache_image(image)?)
}
_ => return Err(Error("Unsupported file format")),
};
#[allow(unreachable_code)]
Ok(File { inner: _inner })
Ok(File { inner })
}

/// Return the file format.
Expand Down
Loading

0 comments on commit 522b160

Please sign in to comment.