@@ -15,6 +15,7 @@ use crate::input_data::InputRef;
15
15
use crate :: input_data:: PRELUDE_FILE_ID ;
16
16
use crate :: input_data:: UNINITIALISED_FILE_ID ;
17
17
use crate :: output_section_id:: CustomSectionDetails ;
18
+ use crate :: output_section_id:: OutputSectionId ;
18
19
use crate :: output_section_id:: OutputSections ;
19
20
use crate :: output_section_id:: OutputSectionsBuilder ;
20
21
use crate :: output_section_id:: SectionName ;
@@ -43,7 +44,11 @@ use linker_utils::elf::SectionFlags;
43
44
use linker_utils:: elf:: SectionType ;
44
45
use object:: read:: elf:: Sym as _;
45
46
use object:: LittleEndian ;
47
+ use rayon:: iter:: ParallelBridge ;
48
+ use rayon:: iter:: ParallelIterator ;
49
+ use std:: collections:: HashMap ;
46
50
use std:: fmt:: Display ;
51
+ use std:: hash:: Hash ;
47
52
use std:: sync:: atomic:: AtomicBool ;
48
53
use std:: sync:: atomic:: Ordering ;
49
54
use std:: thread:: Thread ;
@@ -459,10 +464,12 @@ pub(crate) struct MergeStringsFileSection<'data> {
459
464
pub ( crate ) section_data : & ' data [ u8 ] ,
460
465
}
461
466
467
+ const MERGE_STRING_BUCKETS : usize = 32 ;
468
+
462
469
/// Information about a string-merge section prior to merging.
463
470
pub ( crate ) struct UnresolvedMergeStringsFileSection < ' data > {
464
471
section_index : object:: SectionIndex ,
465
- strings : Vec < PreHashed < StringToMerge < ' data > > > ,
472
+ buckets : [ Vec < PreHashed < StringToMerge < ' data > > > ; MERGE_STRING_BUCKETS ] ,
466
473
}
467
474
468
475
#[ derive( PartialEq , Eq , Clone , Copy , Debug ) ]
@@ -471,7 +478,7 @@ pub(crate) struct StringToMerge<'data> {
471
478
}
472
479
473
480
#[ derive( Default ) ]
474
- pub ( crate ) struct MergeStringsSection < ' data > {
481
+ pub ( crate ) struct MergeStringsSectionBucket < ' data > {
475
482
/// The strings in this section in order. Includes null terminators.
476
483
pub ( crate ) strings : Vec < & ' data [ u8 ] > ,
477
484
@@ -486,9 +493,9 @@ pub(crate) struct MergeStringsSection<'data> {
486
493
pub ( crate ) string_offsets : PassThroughHashMap < StringToMerge < ' data > , u64 > ,
487
494
}
488
495
489
- impl < ' data > MergeStringsSection < ' data > {
496
+ impl < ' data > MergeStringsSectionBucket < ' data > {
490
497
/// Adds `string`, deduplicating with an existing string if an identical string is already
491
- /// present. Returns the offset into the section .
498
+ /// present. Returns the offset within this bucket .
492
499
fn add_string ( & mut self , string : PreHashed < StringToMerge < ' data > > ) -> u64 {
493
500
self . totally_added += string. bytes . len ( ) ;
494
501
* self . string_offsets . entry ( string) . or_insert_with ( || {
@@ -508,14 +515,47 @@ impl<'data> MergeStringsSection<'data> {
508
515
}
509
516
}
510
517
518
+ #[ derive( Default ) ]
519
+ pub ( crate ) struct MergeStringsSection < ' data > {
520
+ /// The buckets based on the hash value of the input string.
521
+ pub ( crate ) buckets : [ MergeStringsSectionBucket < ' data > ; MERGE_STRING_BUCKETS ] ,
522
+
523
+ /// The byte offset of each bucket in the final section.
524
+ pub ( crate ) bucket_offsets : [ u64 ; MERGE_STRING_BUCKETS ] ,
525
+ }
526
+
527
+ impl < ' data > MergeStringsSection < ' data > {
528
+ pub ( crate ) fn get ( & self , string : & PreHashed < StringToMerge < ' data > > ) -> Option < u64 > {
529
+ let bucket_index = ( string. hash ( ) as usize ) % MERGE_STRING_BUCKETS ;
530
+ self . buckets [ bucket_index]
531
+ . get ( string)
532
+ . map ( |offset| self . bucket_offsets [ bucket_index] + offset)
533
+ }
534
+
535
+ pub ( crate ) fn len ( & self ) -> u64 {
536
+ self . bucket_offsets [ MERGE_STRING_BUCKETS - 1 ]
537
+ + self . buckets [ MERGE_STRING_BUCKETS - 1 ] . next_offset
538
+ }
539
+
540
+ pub ( crate ) fn totally_added ( & self ) -> usize {
541
+ self . buckets . iter ( ) . map ( |b| b. totally_added ) . sum ( )
542
+ }
543
+
544
+ pub ( crate ) fn string_count ( & self ) -> usize {
545
+ self . buckets . iter ( ) . map ( |b| b. strings . len ( ) ) . sum ( )
546
+ }
547
+ }
548
+
511
549
/// Merges identical strings from all loaded objects where those strings are from input sections
512
550
/// that are marked with both the SHF_MERGE and SHF_STRINGS flags.
513
551
#[ tracing:: instrument( skip_all, name = "Merge strings" ) ]
514
552
fn merge_strings < ' data > (
515
553
resolved : & mut [ ResolvedGroup < ' data > ] ,
516
554
output_sections : & OutputSections ,
517
555
) -> Result < OutputSectionMap < MergeStringsSection < ' data > > > {
518
- let mut strings_by_section = output_sections. new_section_map :: < MergeStringsSection > ( ) ;
556
+ let mut worklist_per_section: HashMap < OutputSectionId , [ Vec < _ > ; MERGE_STRING_BUCKETS ] > =
557
+ HashMap :: new ( ) ;
558
+
519
559
for group in resolved {
520
560
for file in & mut group. files {
521
561
let ResolvedFile :: Object ( obj) = file else {
@@ -531,17 +571,47 @@ fn merge_strings<'data>(
531
571
bail ! ( "Internal error: expected SectionSlot::MergeStrings" ) ;
532
572
} ;
533
573
534
- let string_to_offset = strings_by_section. get_mut ( sec. part_id . output_section_id ( ) ) ;
535
- for string in & merge_info. strings {
536
- string_to_offset. add_string ( * string) ;
574
+ let id = sec. part_id . output_section_id ( ) ;
575
+ worklist_per_section. entry ( id) . or_default ( ) ;
576
+ for ( i, bucket) in worklist_per_section
577
+ . get_mut ( & id)
578
+ . unwrap ( )
579
+ . iter_mut ( )
580
+ . enumerate ( )
581
+ {
582
+ bucket. push ( & merge_info. buckets [ i] ) ;
537
583
}
538
584
}
539
585
}
540
586
}
541
587
588
+ let mut strings_by_section = output_sections. new_section_map :: < MergeStringsSection > ( ) ;
589
+
590
+ for ( section_id, buckets) in worklist_per_section. iter ( ) {
591
+ let merged_strings = strings_by_section. get_mut ( * section_id) ;
592
+
593
+ buckets
594
+ . iter ( )
595
+ . zip ( merged_strings. buckets . iter_mut ( ) )
596
+ . par_bridge ( )
597
+ . for_each ( |( string_lists, merged_strings) | {
598
+ for strings in string_lists {
599
+ for string in strings. iter ( ) {
600
+ merged_strings. add_string ( * string) ;
601
+ }
602
+ }
603
+ } ) ;
604
+
605
+ for i in 1 ..MERGE_STRING_BUCKETS {
606
+ merged_strings. bucket_offsets [ i] =
607
+ merged_strings. bucket_offsets [ i - 1 ] + merged_strings. buckets [ i - 1 ] . len ( ) ;
608
+ }
609
+ }
610
+
542
611
strings_by_section. for_each ( |section_id, sec| {
543
612
if sec. len ( ) > 0 {
544
- tracing:: debug!( section = ?output_sections. name( section_id) , size = sec. len( ) , sec. totally_added, strings = sec. strings. len( ) , "merge_strings" ) ;
613
+ tracing:: debug!( section = ?output_sections. name( section_id) , size = sec. len( ) ,
614
+ totally_added = sec. totally_added( ) , strings = sec. string_count( ) , "merge_strings" ) ;
545
615
}
546
616
} ) ;
547
617
@@ -962,13 +1032,14 @@ impl<'data> UnresolvedMergeStringsFileSection<'data> {
962
1032
section_index : object:: SectionIndex ,
963
1033
) -> Result < UnresolvedMergeStringsFileSection < ' data > > {
964
1034
let mut remaining = section_data;
965
- let mut strings = Vec :: new ( ) ;
1035
+ let mut buckets : [ Vec < PreHashed < StringToMerge > > ; MERGE_STRING_BUCKETS ] = Default :: default ( ) ;
966
1036
while !remaining. is_empty ( ) {
967
- strings. push ( StringToMerge :: take_hashed ( & mut remaining) ?) ;
1037
+ let string = StringToMerge :: take_hashed ( & mut remaining) ?;
1038
+ buckets[ ( string. hash ( ) as usize ) % MERGE_STRING_BUCKETS ] . push ( string) ;
968
1039
}
969
1040
Ok ( UnresolvedMergeStringsFileSection {
970
1041
section_index,
971
- strings ,
1042
+ buckets ,
972
1043
} )
973
1044
}
974
1045
}
0 commit comments