Skip to content

Commit e1fdeca

Browse files
committed
Implement parallel string merging using buckets
Implements: davidlattimore#117
1 parent b866f45 commit e1fdeca

File tree

5 files changed

+104
-42
lines changed

5 files changed

+104
-42
lines changed

Diff for: wild/tests/integration_tests.rs

+12-24
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,9 @@
55
//! that are tested by examining the resulting binaries. Directives have the format '//#Directive:
66
//! Args'.
77
//!
8-
//! ExpectComment: Checks that the the next comment in the .comment section is equal to the supplied
8+
//! ExpectComment: Checks that the comment in the .comment section is equal to the supplied
99
//! argument. If no ExpectComment directives are given then .comment isn't checked. The argument may
10-
//! end with '*' which matches anything. The last ExpectComment directive may start with '?' to
11-
//! indicate that the comment if present should match the rest of the argument, but that it's OK for
12-
//! it to be absent.
10+
//! end with '*' which matches anything.
1311
//!
1412
//! TODO: Document the rest of the directives.
1513
@@ -973,29 +971,19 @@ impl Assertions {
973971
return Ok(());
974972
}
975973
let actual_comments = read_comments(obj)?;
976-
let mut actual_comments_iter = actual_comments.iter();
977-
let mut expected_comments = self.expected_comments.iter();
978-
loop {
979-
match (expected_comments.next(), actual_comments_iter.next()) {
980-
(None, None) => break,
981-
(None, Some(a)) => bail!("Unexpected .comment `{a}`"),
982-
(Some(e), None) => {
983-
if !e.starts_with('?') {
984-
bail!("Missing expected .comment `{e}`")
985-
}
986-
}
987-
(Some(e), Some(a)) => {
988-
let e = e.strip_prefix('?').unwrap_or(e.as_str());
989-
if let Some(prefix) = e.strip_suffix('*') {
990-
if !a.starts_with(prefix) {
991-
bail!("Expected .comment starting with `{prefix}`, got `{a}`");
992-
}
993-
} else if e != a {
994-
bail!("Expected .comment `{e}`, got `{a}`");
995-
}
974+
for expected in self.expected_comments.iter() {
975+
if let Some(expected) = expected.strip_suffix('*') {
976+
if !actual_comments
977+
.iter()
978+
.any(|actual| actual.starts_with(expected))
979+
{
980+
bail!("Expected .comment starting with `{expected}`");
996981
}
982+
} else if !actual_comments.iter().any(|actual| actual == expected) {
983+
bail!("Expected .comment `{expected}`");
997984
}
998985
}
986+
999987
Ok(())
1000988
}
1001989

Diff for: wild/tests/sources/comments.c

-3
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,3 @@ void _start(void) {
2222
//#ExpectComment:GCC*
2323
//#ExpectComment:Foo
2424
//#ExpectComment:Bar
25-
26-
// Our linker writes this, but GNU ld doesn't, so it's optional.
27-
//#ExpectComment:?Linker*

Diff for: wild_lib/src/elf_writer.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -1922,9 +1922,11 @@ impl PreludeLayout {
19221922
if merged.len() > 0 {
19231923
let buffer =
19241924
buffers.get_mut(section_id.part_id_with_alignment(crate::alignment::MIN));
1925-
for string in &merged.strings {
1926-
let dest = crate::slice::slice_take_prefix_mut(buffer, string.len());
1927-
dest.copy_from_slice(string)
1925+
for bucket in &merged.buckets {
1926+
for string in &bucket.strings {
1927+
let dest = crate::slice::slice_take_prefix_mut(buffer, string.len());
1928+
dest.copy_from_slice(string)
1929+
}
19281930
}
19291931
}
19301932
});

Diff for: wild_lib/src/hash.rs

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ impl<T> PreHashed<T> {
5454
pub(crate) fn new(value: T, hash: u64) -> Self {
5555
Self { value, hash }
5656
}
57+
58+
pub(crate) fn hash(&self) -> u64 {
59+
self.hash
60+
}
5761
}
5862

5963
impl<T> std::hash::Hash for PreHashed<T> {

Diff for: wild_lib/src/resolution.rs

+83-12
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use crate::input_data::InputRef;
1515
use crate::input_data::PRELUDE_FILE_ID;
1616
use crate::input_data::UNINITIALISED_FILE_ID;
1717
use crate::output_section_id::CustomSectionDetails;
18+
use crate::output_section_id::OutputSectionId;
1819
use crate::output_section_id::OutputSections;
1920
use crate::output_section_id::OutputSectionsBuilder;
2021
use crate::output_section_id::SectionName;
@@ -43,7 +44,11 @@ use linker_utils::elf::SectionFlags;
4344
use linker_utils::elf::SectionType;
4445
use object::read::elf::Sym as _;
4546
use object::LittleEndian;
47+
use rayon::iter::ParallelBridge;
48+
use rayon::iter::ParallelIterator;
49+
use std::collections::HashMap;
4650
use std::fmt::Display;
51+
use std::hash::Hash;
4752
use std::sync::atomic::AtomicBool;
4853
use std::sync::atomic::Ordering;
4954
use std::thread::Thread;
@@ -459,10 +464,12 @@ pub(crate) struct MergeStringsFileSection<'data> {
459464
pub(crate) section_data: &'data [u8],
460465
}
461466

467+
const MERGE_STRING_BUCKETS: usize = 32;
468+
462469
/// Information about a string-merge section prior to merging.
463470
pub(crate) struct UnresolvedMergeStringsFileSection<'data> {
464471
section_index: object::SectionIndex,
465-
strings: Vec<PreHashed<StringToMerge<'data>>>,
472+
buckets: [Vec<PreHashed<StringToMerge<'data>>>; MERGE_STRING_BUCKETS],
466473
}
467474

468475
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
@@ -471,7 +478,7 @@ pub(crate) struct StringToMerge<'data> {
471478
}
472479

473480
#[derive(Default)]
474-
pub(crate) struct MergeStringsSection<'data> {
481+
pub(crate) struct MergeStringsSectionBucket<'data> {
475482
/// The strings in this section in order. Includes null terminators.
476483
pub(crate) strings: Vec<&'data [u8]>,
477484

@@ -486,9 +493,9 @@ pub(crate) struct MergeStringsSection<'data> {
486493
pub(crate) string_offsets: PassThroughHashMap<StringToMerge<'data>, u64>,
487494
}
488495

489-
impl<'data> MergeStringsSection<'data> {
496+
impl<'data> MergeStringsSectionBucket<'data> {
490497
/// Adds `string`, deduplicating with an existing string if an identical string is already
491-
/// present. Returns the offset into the section.
498+
/// present. Returns the offset within this bucket.
492499
fn add_string(&mut self, string: PreHashed<StringToMerge<'data>>) -> u64 {
493500
self.totally_added += string.bytes.len();
494501
*self.string_offsets.entry(string).or_insert_with(|| {
@@ -508,14 +515,47 @@ impl<'data> MergeStringsSection<'data> {
508515
}
509516
}
510517

518+
#[derive(Default)]
519+
pub(crate) struct MergeStringsSection<'data> {
520+
/// The buckets based on the hash value of the input string.
521+
pub(crate) buckets: [MergeStringsSectionBucket<'data>; MERGE_STRING_BUCKETS],
522+
523+
/// The byte offset of each bucket in the final section.
524+
pub(crate) bucket_offsets: [u64; MERGE_STRING_BUCKETS],
525+
}
526+
527+
impl<'data> MergeStringsSection<'data> {
528+
pub(crate) fn get(&self, string: &PreHashed<StringToMerge<'data>>) -> Option<u64> {
529+
let bucket_index = (string.hash() as usize) % MERGE_STRING_BUCKETS;
530+
self.buckets[bucket_index]
531+
.get(string)
532+
.map(|offset| self.bucket_offsets[bucket_index] + offset)
533+
}
534+
535+
pub(crate) fn len(&self) -> u64 {
536+
self.bucket_offsets[MERGE_STRING_BUCKETS - 1]
537+
+ self.buckets[MERGE_STRING_BUCKETS - 1].next_offset
538+
}
539+
540+
pub(crate) fn totally_added(&self) -> usize {
541+
self.buckets.iter().map(|b| b.totally_added).sum()
542+
}
543+
544+
pub(crate) fn string_count(&self) -> usize {
545+
self.buckets.iter().map(|b| b.strings.len()).sum()
546+
}
547+
}
548+
511549
/// Merges identical strings from all loaded objects where those strings are from input sections
512550
/// that are marked with both the SHF_MERGE and SHF_STRINGS flags.
513551
#[tracing::instrument(skip_all, name = "Merge strings")]
514552
fn merge_strings<'data>(
515553
resolved: &mut [ResolvedGroup<'data>],
516554
output_sections: &OutputSections,
517555
) -> Result<OutputSectionMap<MergeStringsSection<'data>>> {
518-
let mut strings_by_section = output_sections.new_section_map::<MergeStringsSection>();
556+
let mut worklist_per_section: HashMap<OutputSectionId, [Vec<_>; MERGE_STRING_BUCKETS]> =
557+
HashMap::new();
558+
519559
for group in resolved {
520560
for file in &mut group.files {
521561
let ResolvedFile::Object(obj) = file else {
@@ -531,17 +571,47 @@ fn merge_strings<'data>(
531571
bail!("Internal error: expected SectionSlot::MergeStrings");
532572
};
533573

534-
let string_to_offset = strings_by_section.get_mut(sec.part_id.output_section_id());
535-
for string in &merge_info.strings {
536-
string_to_offset.add_string(*string);
574+
let id = sec.part_id.output_section_id();
575+
worklist_per_section.entry(id).or_default();
576+
for (i, bucket) in worklist_per_section
577+
.get_mut(&id)
578+
.unwrap()
579+
.iter_mut()
580+
.enumerate()
581+
{
582+
bucket.push(&merge_info.buckets[i]);
537583
}
538584
}
539585
}
540586
}
541587

588+
let mut strings_by_section = output_sections.new_section_map::<MergeStringsSection>();
589+
590+
for (section_id, buckets) in worklist_per_section.iter() {
591+
let merged_strings = strings_by_section.get_mut(*section_id);
592+
593+
buckets
594+
.iter()
595+
.zip(merged_strings.buckets.iter_mut())
596+
.par_bridge()
597+
.for_each(|(string_lists, merged_strings)| {
598+
for strings in string_lists {
599+
for string in strings.iter() {
600+
merged_strings.add_string(*string);
601+
}
602+
}
603+
});
604+
605+
for i in 1..MERGE_STRING_BUCKETS {
606+
merged_strings.bucket_offsets[i] =
607+
merged_strings.bucket_offsets[i - 1] + merged_strings.buckets[i - 1].len();
608+
}
609+
}
610+
542611
strings_by_section.for_each(|section_id, sec| {
543612
if sec.len() > 0 {
544-
tracing::debug!(section = ?output_sections.name(section_id), size = sec.len(), sec.totally_added, strings = sec.strings.len(), "merge_strings");
613+
tracing::debug!(section = ?output_sections.name(section_id), size = sec.len(),
614+
totally_added = sec.totally_added(), strings = sec.string_count(), "merge_strings");
545615
}
546616
});
547617

@@ -962,13 +1032,14 @@ impl<'data> UnresolvedMergeStringsFileSection<'data> {
9621032
section_index: object::SectionIndex,
9631033
) -> Result<UnresolvedMergeStringsFileSection<'data>> {
9641034
let mut remaining = section_data;
965-
let mut strings = Vec::new();
1035+
let mut buckets: [Vec<PreHashed<StringToMerge>>; MERGE_STRING_BUCKETS] = Default::default();
9661036
while !remaining.is_empty() {
967-
strings.push(StringToMerge::take_hashed(&mut remaining)?);
1037+
let string = StringToMerge::take_hashed(&mut remaining)?;
1038+
buckets[(string.hash() as usize) % MERGE_STRING_BUCKETS].push(string);
9681039
}
9691040
Ok(UnresolvedMergeStringsFileSection {
9701041
section_index,
971-
strings,
1042+
buckets,
9721043
})
9731044
}
9741045
}

0 commit comments

Comments
 (0)