Skip to content

Commit

Permalink
Add a basic merged-string offset cache
Browse files Browse the repository at this point in the history
On my laptop, this reduces the time to link clang with debug info by about 39%.

Issue #117
  • Loading branch information
davidlattimore committed Sep 16, 2024
1 parent 77d3023 commit 32fad3a
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 9 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions wild_lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ bytesize = "1.3.0"
flate2 = "1.0.33"
bumpalo-herd = "0.1.2"
zstd = "0.13.2"
fxhash = "0.2.1"

[dev-dependencies]
ar = "0.9.0"
Expand Down
38 changes: 29 additions & 9 deletions wild_lib/src/elf_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use crate::layout::GroupLayout;
use crate::layout::HeaderInfo;
use crate::layout::InternalSymbols;
use crate::layout::Layout;
use crate::layout::MergeOffsetCache;
use crate::layout::ObjectLayout;
use crate::layout::PreludeLayout;
use crate::layout::Resolution;
Expand Down Expand Up @@ -1113,13 +1114,14 @@ impl<'out> ObjectLayout<'out> {
) -> Result {
let _span = debug_span!("write_file", filename = ?self.input.file.filename).entered();
let _file_span = layout.args().trace_span_for_file(self.file_id);
let mut merge_offset_cache = MergeOffsetCache::new(&layout.output_sections);
for sec in &self.sections {
match sec {
SectionSlot::Loaded(sec) => {
self.write_section(layout, sec, buffers, table_writer)?
}
SectionSlot::LoadedDebugInfo(sec) => {
self.write_debug_section(layout, sec, buffers)?;
self.write_debug_section(layout, sec, buffers, &mut merge_offset_cache)?;
}
SectionSlot::EhFrameData(section_index) => {
self.write_eh_frame_data(*section_index, layout, table_writer)?;
Expand Down Expand Up @@ -1192,9 +1194,10 @@ impl<'out> ObjectLayout<'out> {
layout: &Layout,
sec: &Section,
buffers: &mut OutputSectionPartMap<&mut [u8]>,
merge_offset_cache: &mut MergeOffsetCache,
) -> Result {
let out = self.write_section_raw(layout, sec, buffers)?;
self.apply_debug_relocations(out, sec, layout)
self.apply_debug_relocations(out, sec, layout, merge_offset_cache)
.with_context(|| {
format!(
"Failed to apply relocations in section `{}` of {}",
Expand Down Expand Up @@ -1336,6 +1339,7 @@ impl<'out> ObjectLayout<'out> {
layout,
out,
table_writer,
&mut MergeOffsetCache::no_caching(),
)
.with_context(|| {
format!(
Expand All @@ -1352,6 +1356,7 @@ impl<'out> ObjectLayout<'out> {
out: &mut [u8],
section: &Section,
layout: &Layout,
merge_offset_cache: &mut MergeOffsetCache,
) -> Result {
let object_section = self.object.section(section.index)?;
let section_name = self.object.section_name(object_section)?;
Expand All @@ -1375,13 +1380,21 @@ impl<'out> ObjectLayout<'out> {
.fetch_add(relocations.len() as u64, Relaxed);
for rel in relocations {
let offset_in_section = rel.r_offset.get(LittleEndian);
apply_debug_relocation(self, offset_in_section, rel, layout, tombstone_value, out)
.with_context(|| {
format!(
"Failed to apply {} at offset 0x{offset_in_section:x}",
self.display_relocation(rel, layout)
)
})?;
apply_debug_relocation(
self,
offset_in_section,
rel,
layout,
tombstone_value,
out,
merge_offset_cache,
)
.with_context(|| {
format!(
"Failed to apply {} at offset 0x{offset_in_section:x}",
self.display_relocation(rel, layout)
)
})?;
}
Ok(())
}
Expand Down Expand Up @@ -1506,6 +1519,7 @@ impl<'out> ObjectLayout<'out> {
layout,
entry_out,
table_writer,
&mut MergeOffsetCache::no_caching(),
)
.with_context(|| {
format!(
Expand Down Expand Up @@ -1600,6 +1614,7 @@ fn apply_relocation(
layout: &Layout,
out: &mut [u8],
table_writer: &mut TableWriter,
merge_offset_cache: &mut MergeOffsetCache,
) -> Result<RelocationModifier> {
let section_address = section_info.section_address;
let place = section_address + offset_in_section;
Expand Down Expand Up @@ -1659,6 +1674,7 @@ fn apply_relocation(
object_layout,
&layout.merged_strings,
&layout.merged_string_start_addresses,
merge_offset_cache,
)?
.wrapping_sub(place)
.wrapping_sub(rel_info.byte_size as u64),
Expand Down Expand Up @@ -1721,6 +1737,7 @@ fn apply_debug_relocation(
layout: &Layout,
section_tombstone_value: u64,
out: &mut [u8],
merge_offset_cache: &mut MergeOffsetCache,
) -> Result<()> {
let e = LittleEndian;
let symbol_index = rel
Expand Down Expand Up @@ -1749,6 +1766,7 @@ fn apply_debug_relocation(
object_layout,
&layout.merged_strings,
&layout.merged_string_start_addresses,
merge_offset_cache,
)?,
RelocationKind::DtpOff => resolution
.value()
Expand All @@ -1766,6 +1784,7 @@ fn apply_debug_relocation(
&layout.merged_strings,
&layout.merged_string_start_addresses,
false,
merge_offset_cache,
)?
.context("Cannot get merged string offset for a debug info section")?,
SectionSlot::Discard | SectionSlot::Unloaded(..) => section_tombstone_value,
Expand Down Expand Up @@ -1814,6 +1833,7 @@ fn write_absolute_relocation(
object_layout,
&layout.merged_strings,
&layout.merged_string_start_addresses,
&mut MergeOffsetCache::no_caching(),
)
}
}
Expand Down
42 changes: 42 additions & 0 deletions wild_lib/src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ use anyhow::ensure;
use anyhow::Context;
use bitflags::bitflags;
use crossbeam_queue::ArrayQueue;
use fxhash::FxHashMap;
use itertools::Itertools;
use linker_utils::elf::shf;
use linker_utils::elf::SectionFlags;
Expand Down Expand Up @@ -3251,6 +3252,7 @@ impl<'data> ObjectLayoutState<'data> {
resources.merged_strings,
resources.merged_string_start_addresses,
true,
&mut MergeOffsetCache::no_caching(),
)?
.ok_or_else(|| {
anyhow!(
Expand Down Expand Up @@ -3695,6 +3697,7 @@ impl Resolution {
object_layout: &ObjectLayout,
merged_strings: &OutputSectionMap<resolution::MergeStringsSection>,
merged_string_start_addresses: &MergedStringStartAddresses,
merge_offset_cache: &mut MergeOffsetCache,
) -> Result<u64> {
// For most symbols, `raw_value` won't be zero, so we can save ourselves from looking up the
// section to see if it's a string-merge section. For string-merge symbols with names,
Expand All @@ -3708,6 +3711,7 @@ impl Resolution {
merged_strings,
merged_string_start_addresses,
false,
merge_offset_cache,
)? {
if self.raw_value != 0 {
bail!("Merged string resolution has value 0x{}", self.raw_value);
Expand All @@ -3729,6 +3733,7 @@ pub(crate) fn get_merged_string_output_address(
merged_strings: &OutputSectionMap<resolution::MergeStringsSection>,
merged_string_start_addresses: &MergedStringStartAddresses,
zero_unnamed: bool,
merge_offset_cache: &mut MergeOffsetCache,
) -> Result<Option<u64>> {
let symbol = object.symbol(symbol_index)?;
let Some(section_index) = object.symbol_section(symbol, symbol_index)? else {
Expand All @@ -3740,6 +3745,18 @@ pub(crate) fn get_merged_string_output_address(
let data = merge_slot.section_data;
let mut input_offset = symbol.st_value(LittleEndian);

let cache_entry = if let Some(section_cache) = merge_offset_cache
.offsets_by_section
.get_mut(merge_slot.part_id.output_section_id().as_usize())
{
match section_cache.entry(input_offset) {
std::collections::hash_map::Entry::Occupied(entry) => return Ok(Some(*entry.get())),
std::collections::hash_map::Entry::Vacant(entry) => Some(entry),
}
} else {
None
};

// When we reference data in a string-merge section via a named symbol, we determine which
// string we're referencing without taking the addend into account, then apply the addend
// afterward. However when the reference is to a section (a symbol without a name), we take the
Expand Down Expand Up @@ -3774,6 +3791,9 @@ pub(crate) fn get_merged_string_output_address(
if symbol_has_name {
address = address.wrapping_add(addend);
}
if let Some(cache_entry) = cache_entry {
cache_entry.insert(address);
}
Ok(Some(address))
}

Expand Down Expand Up @@ -4319,6 +4339,28 @@ impl Display for ResolutionFlags {
}
}

pub(crate) struct MergeOffsetCache {
offsets_by_section: Vec<FxHashMap<u64, u64>>,
}

impl MergeOffsetCache {
pub(crate) fn new(output_sections: &OutputSections) -> Self {
Self {
offsets_by_section: vec![
FxHashMap::with_hasher(fxhash::FxBuildHasher::default());
output_sections.num_sections()
],
}
}

/// Returns an instance that doesn't cache.
pub(crate) fn no_caching() -> MergeOffsetCache {
Self {
offsets_by_section: Vec::new(),
}
}
}

/// Verifies that the code that allocates space for resolutions is consistent with the code that
/// writes those resolutions. e.g. we don't allocate too little or too much space.
#[test]
Expand Down

0 comments on commit 32fad3a

Please sign in to comment.