Skip to content

Commit 8e60813

Browse files
Add a basic merged-string offset cache
On my laptop, this reduces the time to link clang with debug info by about 39%. Issue #117
1 parent 77d3023 commit 8e60813

File tree

4 files changed

+84
-9
lines changed

4 files changed

+84
-9
lines changed

Cargo.lock

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

wild_lib/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ bytesize = "1.3.0"
3434
flate2 = "1.0.33"
3535
bumpalo-herd = "0.1.2"
3636
zstd = "0.13.2"
37+
fxhash = "0.2.1"
3738

3839
[dev-dependencies]
3940
ar = "0.9.0"

wild_lib/src/elf_writer.rs

+29-9
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::layout::PreludeLayout;
3232
use crate::layout::Resolution;
3333
use crate::layout::ResolutionFlags;
3434
use crate::layout::Section;
35+
use crate::layout::StringOffsetCache;
3536
use crate::layout::SymbolCopyInfo;
3637
use crate::output_section_id;
3738
use crate::output_section_id::OrderEvent;
@@ -1113,13 +1114,14 @@ impl<'out> ObjectLayout<'out> {
11131114
) -> Result {
11141115
let _span = debug_span!("write_file", filename = ?self.input.file.filename).entered();
11151116
let _file_span = layout.args().trace_span_for_file(self.file_id);
1117+
let mut string_offset_cache = StringOffsetCache::new(&layout.output_sections);
11161118
for sec in &self.sections {
11171119
match sec {
11181120
SectionSlot::Loaded(sec) => {
11191121
self.write_section(layout, sec, buffers, table_writer)?
11201122
}
11211123
SectionSlot::LoadedDebugInfo(sec) => {
1122-
self.write_debug_section(layout, sec, buffers)?;
1124+
self.write_debug_section(layout, sec, buffers, &mut string_offset_cache)?;
11231125
}
11241126
SectionSlot::EhFrameData(section_index) => {
11251127
self.write_eh_frame_data(*section_index, layout, table_writer)?;
@@ -1192,9 +1194,10 @@ impl<'out> ObjectLayout<'out> {
11921194
layout: &Layout,
11931195
sec: &Section,
11941196
buffers: &mut OutputSectionPartMap<&mut [u8]>,
1197+
string_offset_cache: &mut StringOffsetCache,
11951198
) -> Result {
11961199
let out = self.write_section_raw(layout, sec, buffers)?;
1197-
self.apply_debug_relocations(out, sec, layout)
1200+
self.apply_debug_relocations(out, sec, layout, string_offset_cache)
11981201
.with_context(|| {
11991202
format!(
12001203
"Failed to apply relocations in section `{}` of {}",
@@ -1336,6 +1339,7 @@ impl<'out> ObjectLayout<'out> {
13361339
layout,
13371340
out,
13381341
table_writer,
1342+
&mut StringOffsetCache::no_caching(),
13391343
)
13401344
.with_context(|| {
13411345
format!(
@@ -1352,6 +1356,7 @@ impl<'out> ObjectLayout<'out> {
13521356
out: &mut [u8],
13531357
section: &Section,
13541358
layout: &Layout,
1359+
string_offset_cache: &mut StringOffsetCache,
13551360
) -> Result {
13561361
let object_section = self.object.section(section.index)?;
13571362
let section_name = self.object.section_name(object_section)?;
@@ -1375,13 +1380,21 @@ impl<'out> ObjectLayout<'out> {
13751380
.fetch_add(relocations.len() as u64, Relaxed);
13761381
for rel in relocations {
13771382
let offset_in_section = rel.r_offset.get(LittleEndian);
1378-
apply_debug_relocation(self, offset_in_section, rel, layout, tombstone_value, out)
1379-
.with_context(|| {
1380-
format!(
1381-
"Failed to apply {} at offset 0x{offset_in_section:x}",
1382-
self.display_relocation(rel, layout)
1383-
)
1384-
})?;
1383+
apply_debug_relocation(
1384+
self,
1385+
offset_in_section,
1386+
rel,
1387+
layout,
1388+
tombstone_value,
1389+
out,
1390+
string_offset_cache,
1391+
)
1392+
.with_context(|| {
1393+
format!(
1394+
"Failed to apply {} at offset 0x{offset_in_section:x}",
1395+
self.display_relocation(rel, layout)
1396+
)
1397+
})?;
13851398
}
13861399
Ok(())
13871400
}
@@ -1506,6 +1519,7 @@ impl<'out> ObjectLayout<'out> {
15061519
layout,
15071520
entry_out,
15081521
table_writer,
1522+
&mut StringOffsetCache::no_caching(),
15091523
)
15101524
.with_context(|| {
15111525
format!(
@@ -1600,6 +1614,7 @@ fn apply_relocation(
16001614
layout: &Layout,
16011615
out: &mut [u8],
16021616
table_writer: &mut TableWriter,
1617+
string_offset_cache: &mut StringOffsetCache,
16031618
) -> Result<RelocationModifier> {
16041619
let section_address = section_info.section_address;
16051620
let place = section_address + offset_in_section;
@@ -1659,6 +1674,7 @@ fn apply_relocation(
16591674
object_layout,
16601675
&layout.merged_strings,
16611676
&layout.merged_string_start_addresses,
1677+
string_offset_cache,
16621678
)?
16631679
.wrapping_sub(place)
16641680
.wrapping_sub(rel_info.byte_size as u64),
@@ -1721,6 +1737,7 @@ fn apply_debug_relocation(
17211737
layout: &Layout,
17221738
section_tombstone_value: u64,
17231739
out: &mut [u8],
1740+
string_offset_cache: &mut StringOffsetCache,
17241741
) -> Result<()> {
17251742
let e = LittleEndian;
17261743
let symbol_index = rel
@@ -1749,6 +1766,7 @@ fn apply_debug_relocation(
17491766
object_layout,
17501767
&layout.merged_strings,
17511768
&layout.merged_string_start_addresses,
1769+
string_offset_cache,
17521770
)?,
17531771
RelocationKind::DtpOff => resolution
17541772
.value()
@@ -1766,6 +1784,7 @@ fn apply_debug_relocation(
17661784
&layout.merged_strings,
17671785
&layout.merged_string_start_addresses,
17681786
false,
1787+
string_offset_cache,
17691788
)?
17701789
.context("Cannot get merged string offset for a debug info section")?,
17711790
SectionSlot::Discard | SectionSlot::Unloaded(..) => section_tombstone_value,
@@ -1814,6 +1833,7 @@ fn write_absolute_relocation(
18141833
object_layout,
18151834
&layout.merged_strings,
18161835
&layout.merged_string_start_addresses,
1836+
&mut StringOffsetCache::no_caching(),
18171837
)
18181838
}
18191839
}

wild_lib/src/layout.rs

+44
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ use anyhow::ensure;
5858
use anyhow::Context;
5959
use bitflags::bitflags;
6060
use crossbeam_queue::ArrayQueue;
61+
use fxhash::FxHashMap;
6162
use itertools::Itertools;
6263
use linker_utils::elf::shf;
6364
use linker_utils::elf::SectionFlags;
@@ -3251,6 +3252,7 @@ impl<'data> ObjectLayoutState<'data> {
32513252
resources.merged_strings,
32523253
resources.merged_string_start_addresses,
32533254
true,
3255+
&mut StringOffsetCache::no_caching(),
32543256
)?
32553257
.ok_or_else(|| {
32563258
anyhow!(
@@ -3695,6 +3697,7 @@ impl Resolution {
36953697
object_layout: &ObjectLayout,
36963698
merged_strings: &OutputSectionMap<resolution::MergeStringsSection>,
36973699
merged_string_start_addresses: &MergedStringStartAddresses,
3700+
string_offset_cache: &mut StringOffsetCache,
36983701
) -> Result<u64> {
36993702
// For most symbols, `raw_value` won't be zero, so we can save ourselves from looking up the
37003703
// section to see if it's a string-merge section. For string-merge symbols with names,
@@ -3708,6 +3711,7 @@ impl Resolution {
37083711
merged_strings,
37093712
merged_string_start_addresses,
37103713
false,
3714+
string_offset_cache,
37113715
)? {
37123716
if self.raw_value != 0 {
37133717
bail!("Merged string resolution has value 0x{}", self.raw_value);
@@ -3729,6 +3733,7 @@ pub(crate) fn get_merged_string_output_address(
37293733
merged_strings: &OutputSectionMap<resolution::MergeStringsSection>,
37303734
merged_string_start_addresses: &MergedStringStartAddresses,
37313735
zero_unnamed: bool,
3736+
string_offset_cache: &mut StringOffsetCache,
37323737
) -> Result<Option<u64>> {
37333738
let symbol = object.symbol(symbol_index)?;
37343739
let Some(section_index) = object.symbol_section(symbol, symbol_index)? else {
@@ -3740,6 +3745,18 @@ pub(crate) fn get_merged_string_output_address(
37403745
let data = merge_slot.section_data;
37413746
let mut input_offset = symbol.st_value(LittleEndian);
37423747

3748+
let cache_entry = if let Some(section_cache) = string_offset_cache
3749+
.offsets_by_section
3750+
.get_mut(merge_slot.part_id.output_section_id().as_usize())
3751+
{
3752+
match section_cache.entry(input_offset) {
3753+
std::collections::hash_map::Entry::Occupied(entry) => return Ok(Some(*entry.get())),
3754+
std::collections::hash_map::Entry::Vacant(entry) => Some(entry),
3755+
}
3756+
} else {
3757+
None
3758+
};
3759+
37433760
// When we reference data in a string-merge section via a named symbol, we determine which
37443761
// string we're referencing without taking the addend into account, then apply the addend
37453762
// afterward. However when the reference is to a section (a symbol without a name), we take the
@@ -3774,6 +3791,9 @@ pub(crate) fn get_merged_string_output_address(
37743791
if symbol_has_name {
37753792
address = address.wrapping_add(addend);
37763793
}
3794+
if let Some(cache_entry) = cache_entry {
3795+
cache_entry.insert(address);
3796+
}
37773797
Ok(Some(address))
37783798
}
37793799

@@ -4319,6 +4339,30 @@ impl Display for ResolutionFlags {
43194339
}
43204340
}
43214341

4342+
pub(crate) struct StringOffsetCache {
4343+
/// For each output section, a map from input offset to output offset. Empty if caching is
4344+
/// disabled.
4345+
offsets_by_section: Vec<FxHashMap<u64, u64>>,
4346+
}
4347+
4348+
impl StringOffsetCache {
4349+
pub(crate) fn new(output_sections: &OutputSections) -> Self {
4350+
Self {
4351+
offsets_by_section: vec![
4352+
FxHashMap::with_hasher(fxhash::FxBuildHasher::default());
4353+
output_sections.num_sections()
4354+
],
4355+
}
4356+
}
4357+
4358+
/// Returns an instance that doesn't cache.
4359+
pub(crate) fn no_caching() -> StringOffsetCache {
4360+
Self {
4361+
offsets_by_section: Vec::new(),
4362+
}
4363+
}
4364+
}
4365+
43224366
/// Verifies that the code that allocates space for resolutions is consistent with the code that
43234367
/// writes those resolutions. e.g. we don't allocate too little or too much space.
43244368
#[test]

0 commit comments

Comments
 (0)