From 79aef77c0babb051ffb06506a59bc5a2c35c6ca3 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 13:23:17 +0200 Subject: [PATCH 1/7] [machO] Set section to max alignment of content decls --- src/artifact/decl.rs | 26 +++++++++++++++++++------- src/elf.rs | 4 ++-- src/mach.rs | 18 ++++++++++++++++-- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/artifact/decl.rs b/src/artifact/decl.rs index a5489d2..e1d50ff 100644 --- a/src/artifact/decl.rs +++ b/src/artifact/decl.rs @@ -147,16 +147,19 @@ macro_rules! align_methods { () => { /// Build alignment. Size is in bytes. If None, a default is chosen /// in the backend. - pub fn with_align(mut self, align: Option) -> Self { - self.align = align; + pub fn with_align(mut self, align: Option) -> Self { + self.set_align(align); self } /// Set alignment - pub fn set_align(&mut self, align: Option) { + pub fn set_align(&mut self, align: Option) { + if let Some(align) = align { + debug_assert_eq!(align.checked_next_power_of_two(), Some(align)); + } self.align = align; } /// Get alignment - pub fn get_align(&self) -> Option { + pub fn get_align(&self) -> Option { self.align } } @@ -215,6 +218,15 @@ impl DefinedDecl { DefinedDecl::Section(a) => a.is_writable(), } } + + /// Accessir to determine the minimal alignment + pub fn get_align(&self) -> Option { + match self { + DefinedDecl::Data(a) => a.get_align(), + DefinedDecl::Function(a) => a.get_align(), + DefinedDecl::Section(a) => a.get_align(), + } + } } impl Decl { @@ -387,7 +399,7 @@ impl Into for DataImportDecl { pub struct FunctionDecl { scope: Scope, visibility: Visibility, - align: Option, + align: Option, } impl Default for FunctionDecl { @@ -419,7 +431,7 @@ pub struct DataDecl { visibility: Visibility, writable: bool, datatype: DataType, - align: Option, + align: Option, } impl Default for DataDecl { @@ -487,7 +499,7 @@ pub enum SectionKind { pub struct SectionDecl { kind: SectionKind, datatype: DataType, - align: Option, + align: Option, } impl SectionDecl { diff --git a/src/elf.rs b/src/elf.rs index 7601b32..66cb7d1 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -202,7 +202,7 @@ struct SectionBuilder { alloc: bool, size: u64, name_offset: usize, - align: Option, + align: Option, } impl SectionBuilder { @@ -234,7 +234,7 @@ impl SectionBuilder { self } /// Specify section alignment - pub fn align(mut self, align: Option) -> Self { + pub fn align(mut self, align: Option) -> Self { self.align = align; self } diff --git a/src/mach.rs b/src/mach.rs index a740c63..73b4350 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -45,6 +45,16 @@ impl From for CpuType { } } +fn align_to_align_exp(align: u64) -> u64 { + assert!(align != 0); + assert!(align.is_power_of_two()); + let mut align_exp = 0; + while 1 << align_exp != align { + align_exp += 1; + } + align_exp +} + type SectionIndex = usize; type StrtableOffset = u64; @@ -422,11 +432,12 @@ impl SegmentBuilder { symbol_offset: &mut u64, section: SectionIndex, definitions: &[Definition], - alignment_exponent: u64, + min_alignment_exponent: u64, flags: Option, ) { let mut local_size = 0; let mut segment_relative_offset = 0; + let mut alignment_exponent = min_alignment_exponent; for def in definitions { if let DefinedDecl::Section { .. } = def.decl { unreachable!(); @@ -443,6 +454,9 @@ impl SegmentBuilder { ); *symbol_offset += def.data.len() as u64; segment_relative_offset += def.data.len() as u64; + if let Some(align) = def.decl.get_align() { + alignment_exponent = std::cmp::max(alignment_exponent, align_to_align_exp(align)); + } } let mut section = SectionBuilder::new(sectname.to_string(), segname, local_size) .offset(*offset) @@ -504,7 +518,7 @@ impl SegmentBuilder { let section = SectionBuilder::new(sectname, segment_name, local_size) .offset(*offset) .addr(*addr) - .align(1) + .align(align_to_align_exp(s.get_align().unwrap_or(1))) .flags(flags); *offset += local_size; *addr += local_size; From 4ca0c82cf26676ba4c8f85f593a9d9cc3b5a32fd Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 14:42:29 +0200 Subject: [PATCH 2/7] [WIP] Correctly pad decls --- examples/prototype.rs | 2 +- src/artifact/decl.rs | 4 +-- src/mach.rs | 69 ++++++++++++++++++++++++++++++++++++++----- 3 files changed, 64 insertions(+), 11 deletions(-) diff --git a/examples/prototype.rs b/examples/prototype.rs index 161e447..c1c5d67 100644 --- a/examples/prototype.rs +++ b/examples/prototype.rs @@ -86,7 +86,7 @@ fn run (args: Args) -> Result<(), Error> { ("str.1", Decl::cstring().into()), ("DEADBEEF", Decl::data_import().into()), ("STATIC", Decl::data().global().writable().into()), - ("STATIC_REF", Decl::data().global().writable().into()), + ("STATIC_REF", Decl::data().global().writable().with_align(Some(64)).into()), ("printf", Decl::function_import().into()), ]; obj.declarations(declarations.into_iter())?; diff --git a/src/artifact/decl.rs b/src/artifact/decl.rs index e1d50ff..680ea00 100644 --- a/src/artifact/decl.rs +++ b/src/artifact/decl.rs @@ -147,8 +147,8 @@ macro_rules! align_methods { () => { /// Build alignment. Size is in bytes. If None, a default is chosen /// in the backend. - pub fn with_align(mut self, align: Option) -> Self { - self.set_align(align); + pub fn with_align(mut self, align: Option) -> Self { + self.set_align(align.map(|align| align as u64)); self } /// Set alignment diff --git a/src/mach.rs b/src/mach.rs index 73b4350..fc136ca 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -8,6 +8,7 @@ use failure::Error; use indexmap::IndexMap; use scroll::ctx::SizeWith; use scroll::{IOwrite, Pwrite}; +use std::collections::HashMap; use std::io::SeekFrom::*; use std::io::{BufWriter, Cursor, Seek, Write}; use string_interner::DefaultStringInterner; @@ -115,7 +116,6 @@ impl SymbolBuilder { self } /// Finalize and create the symbol - /// The n_value (offset into section) is still unset, and needs to be generated by the client pub fn create(self) -> Nlist { use goblin::mach::symbols::{NO_SECT, N_EXT, N_SECT, N_UNDF}; let n_strx = self.name; @@ -405,6 +405,7 @@ struct SegmentBuilder { /// A stupid offset value I need to refactor out pub offset: u64, size: u64, + align_pad_map: HashMap, } impl SegmentBuilder { @@ -434,29 +435,48 @@ impl SegmentBuilder { definitions: &[Definition], min_alignment_exponent: u64, flags: Option, + align_pad_map: &mut HashMap, + segment_relative_offset: &mut u64, ) { let mut local_size = 0; - let mut segment_relative_offset = 0; + let mut section_relative_offset = 0; let mut alignment_exponent = min_alignment_exponent; for def in definitions { if let DefinedDecl::Section { .. } = def.decl { unreachable!(); } - local_size += def.data.len() as u64; + + let def_alignment_exponent = std::cmp::max( + min_alignment_exponent, + align_to_align_exp(def.decl.get_align().unwrap_or(1)), + ); + alignment_exponent = std::cmp::max(alignment_exponent, def_alignment_exponent); + + let align_pad = (2 << def_alignment_exponent) - (*segment_relative_offset % (2 << def_alignment_exponent)); + let align_pad = if align_pad == (2 << def_alignment_exponent) { + 0 + } else { + align_pad + }; + align_pad_map.insert(def.name.to_string(), align_pad); + + *symbol_offset += align_pad; + section_relative_offset += align_pad; + *segment_relative_offset += align_pad; + + local_size += align_pad + def.data.len() as u64; symtab.insert( def.name, SymbolType::Defined { section, - segment_relative_offset, + segment_relative_offset: section_relative_offset, absolute_offset: *symbol_offset, global: def.decl.is_global(), }, ); *symbol_offset += def.data.len() as u64; - segment_relative_offset += def.data.len() as u64; - if let Some(align) = def.decl.get_align() { - alignment_exponent = std::cmp::max(alignment_exponent, align_to_align_exp(align)); - } + section_relative_offset += def.data.len() as u64; + *segment_relative_offset += def.data.len() as u64; } let mut section = SectionBuilder::new(sectname.to_string(), segname, local_size) .offset(*offset) @@ -539,6 +559,8 @@ impl SegmentBuilder { let mut size = 0; let mut symbol_offset = 0; let mut sections = IndexMap::new(); + let mut align_pad_map = HashMap::new(); + let mut segment_relative_offset = 0; Self::build_section( symtab, "__text", @@ -551,6 +573,8 @@ impl SegmentBuilder { &code, 4, Some(S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS), + &mut align_pad_map, + &mut segment_relative_offset, ); Self::build_section( symtab, @@ -564,6 +588,8 @@ impl SegmentBuilder { &data, 3, None, + &mut align_pad_map, + &mut segment_relative_offset, ); Self::build_section( symtab, @@ -577,6 +603,8 @@ impl SegmentBuilder { &cstrings, 0, Some(S_CSTRING_LITERALS), + &mut align_pad_map, + &mut segment_relative_offset, ); for (idx, def) in custom_sections.iter().enumerate() { Self::build_custom_section( @@ -602,6 +630,7 @@ impl SegmentBuilder { size, sections, offset, + align_pad_map, } } } @@ -755,6 +784,12 @@ impl<'a> Mach<'a> { // write code ////////////////////////////// for code in self.code { + if let Some(&align_pad) = self.segment.align_pad_map.get(code.name) { + for _ in 0..align_pad { + file.write_all(&[0xaa])?; + } + } + file.write_all(code.data)?; } debug!("SEEK: after code: {}", file.seek(Current(0))?); @@ -763,6 +798,12 @@ impl<'a> Mach<'a> { // write data ////////////////////////////// for data in self.data { + if let Some(&align_pad) = self.segment.align_pad_map.get(data.name) { + for _ in 0..align_pad { + file.write_all(&[0xaa])?; + } + } + file.write_all(data.data)?; } debug!("SEEK: after data: {}", file.seek(Current(0))?); @@ -771,6 +812,12 @@ impl<'a> Mach<'a> { // write cstrings ////////////////////////////// for cstring in self.cstrings { + if let Some(&align_pad) = self.segment.align_pad_map.get(cstring.name) { + for _ in 0..align_pad { + file.write_all(&[0xaa])?; + } + } + file.write_all(cstring.data)?; } debug!("SEEK: after cstrings: {}", file.seek(Current(0))?); @@ -779,6 +826,12 @@ impl<'a> Mach<'a> { // write custom sections ////////////////////////////// for section in self.sections { + if let Some(&align_pad) = self.segment.align_pad_map.get(section.name) { + for _ in 0..align_pad { + file.write_all(&[0xaa])?; + } + } + file.write_all(section.data)?; } debug!("SEEK: after custom sections: {}", file.seek(Current(0))?); From 92433fd3b786302cd656cd03c099795865ba231e Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 17:07:11 +0200 Subject: [PATCH 3/7] Put the alignment padding after the decl instead of in front --- src/mach.rs | 67 +++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/src/mach.rs b/src/mach.rs index fc136ca..242f73e 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -436,35 +436,16 @@ impl SegmentBuilder { min_alignment_exponent: u64, flags: Option, align_pad_map: &mut HashMap, - segment_relative_offset: &mut u64, ) { let mut local_size = 0; let mut section_relative_offset = 0; let mut alignment_exponent = min_alignment_exponent; - for def in definitions { + let mut def_iter = definitions.iter().peekable(); + while let Some(def) = def_iter.next() { if let DefinedDecl::Section { .. } = def.decl { unreachable!(); } - let def_alignment_exponent = std::cmp::max( - min_alignment_exponent, - align_to_align_exp(def.decl.get_align().unwrap_or(1)), - ); - alignment_exponent = std::cmp::max(alignment_exponent, def_alignment_exponent); - - let align_pad = (2 << def_alignment_exponent) - (*segment_relative_offset % (2 << def_alignment_exponent)); - let align_pad = if align_pad == (2 << def_alignment_exponent) { - 0 - } else { - align_pad - }; - align_pad_map.insert(def.name.to_string(), align_pad); - - *symbol_offset += align_pad; - section_relative_offset += align_pad; - *segment_relative_offset += align_pad; - - local_size += align_pad + def.data.len() as u64; symtab.insert( def.name, SymbolType::Defined { @@ -476,7 +457,25 @@ impl SegmentBuilder { ); *symbol_offset += def.data.len() as u64; section_relative_offset += def.data.len() as u64; - *segment_relative_offset += def.data.len() as u64; + local_size += def.data.len() as u64; + + let next_def_alignment_exponent = std::cmp::max( + min_alignment_exponent, + def_iter.peek().map(|def| align_to_align_exp(def.decl.get_align().unwrap_or(1))).unwrap_or(0) + ); + alignment_exponent = std::cmp::max(alignment_exponent, next_def_alignment_exponent); + + let align_pad = (1 << next_def_alignment_exponent) - (section_relative_offset % (1 << next_def_alignment_exponent)); + let align_pad = if align_pad == (1 << next_def_alignment_exponent) { + 0 + } else { + align_pad + }; + align_pad_map.insert(def.name.to_string(), align_pad); + + *symbol_offset += align_pad; + section_relative_offset += align_pad; + local_size += align_pad; } let mut section = SectionBuilder::new(sectname.to_string(), segname, local_size) .offset(*offset) @@ -560,7 +559,6 @@ impl SegmentBuilder { let mut symbol_offset = 0; let mut sections = IndexMap::new(); let mut align_pad_map = HashMap::new(); - let mut segment_relative_offset = 0; Self::build_section( symtab, "__text", @@ -574,7 +572,6 @@ impl SegmentBuilder { 4, Some(S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS), &mut align_pad_map, - &mut segment_relative_offset, ); Self::build_section( symtab, @@ -589,7 +586,6 @@ impl SegmentBuilder { 3, None, &mut align_pad_map, - &mut segment_relative_offset, ); Self::build_section( symtab, @@ -604,7 +600,6 @@ impl SegmentBuilder { 0, Some(S_CSTRING_LITERALS), &mut align_pad_map, - &mut segment_relative_offset, ); for (idx, def) in custom_sections.iter().enumerate() { Self::build_custom_section( @@ -784,13 +779,15 @@ impl<'a> Mach<'a> { // write code ////////////////////////////// for code in self.code { + file.write_all(code.data)?; + if let Some(&align_pad) = self.segment.align_pad_map.get(code.name) { for _ in 0..align_pad { - file.write_all(&[0xaa])?; + // `0xcc` generates a debug interrupt on x86. When there is no debugger attached + // this will abort the program. + file.write_all(&[0xcc])?; } } - - file.write_all(code.data)?; } debug!("SEEK: after code: {}", file.seek(Current(0))?); @@ -798,13 +795,13 @@ impl<'a> Mach<'a> { // write data ////////////////////////////// for data in self.data { + file.write_all(data.data)?; + if let Some(&align_pad) = self.segment.align_pad_map.get(data.name) { for _ in 0..align_pad { file.write_all(&[0xaa])?; } } - - file.write_all(data.data)?; } debug!("SEEK: after data: {}", file.seek(Current(0))?); @@ -812,13 +809,13 @@ impl<'a> Mach<'a> { // write cstrings ////////////////////////////// for cstring in self.cstrings { + file.write_all(cstring.data)?; + if let Some(&align_pad) = self.segment.align_pad_map.get(cstring.name) { for _ in 0..align_pad { file.write_all(&[0xaa])?; } } - - file.write_all(cstring.data)?; } debug!("SEEK: after cstrings: {}", file.seek(Current(0))?); @@ -826,13 +823,13 @@ impl<'a> Mach<'a> { // write custom sections ////////////////////////////// for section in self.sections { + file.write_all(section.data)?; + if let Some(&align_pad) = self.segment.align_pad_map.get(section.name) { for _ in 0..align_pad { file.write_all(&[0xaa])?; } } - - file.write_all(section.data)?; } debug!("SEEK: after custom sections: {}", file.seek(Current(0))?); From 518ed0896613fc834fd2d37418a78c75fac378fe Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 17:07:22 +0200 Subject: [PATCH 4/7] Rustfmt --- src/mach.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mach.rs b/src/mach.rs index 242f73e..f7f00b9 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -461,11 +461,15 @@ impl SegmentBuilder { let next_def_alignment_exponent = std::cmp::max( min_alignment_exponent, - def_iter.peek().map(|def| align_to_align_exp(def.decl.get_align().unwrap_or(1))).unwrap_or(0) + def_iter + .peek() + .map(|def| align_to_align_exp(def.decl.get_align().unwrap_or(1))) + .unwrap_or(0), ); alignment_exponent = std::cmp::max(alignment_exponent, next_def_alignment_exponent); - let align_pad = (1 << next_def_alignment_exponent) - (section_relative_offset % (1 << next_def_alignment_exponent)); + let align_pad = (1 << next_def_alignment_exponent) + - (section_relative_offset % (1 << next_def_alignment_exponent)); let align_pad = if align_pad == (1 << next_def_alignment_exponent) { 0 } else { From 64e04d12a16c43029875eac209de8c0dee07a71b Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 17:08:27 +0200 Subject: [PATCH 5/7] Use u64 everywhere as alignment type --- src/artifact/decl.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/artifact/decl.rs b/src/artifact/decl.rs index 680ea00..e1d50ff 100644 --- a/src/artifact/decl.rs +++ b/src/artifact/decl.rs @@ -147,8 +147,8 @@ macro_rules! align_methods { () => { /// Build alignment. Size is in bytes. If None, a default is chosen /// in the backend. - pub fn with_align(mut self, align: Option) -> Self { - self.set_align(align.map(|align| align as u64)); + pub fn with_align(mut self, align: Option) -> Self { + self.set_align(align); self } /// Set alignment From c16a669610b3fcef32c1420071606edb53fcdfc6 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 24 Aug 2019 17:08:58 +0200 Subject: [PATCH 6/7] Fix spell error --- src/artifact/decl.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/artifact/decl.rs b/src/artifact/decl.rs index e1d50ff..9b101c5 100644 --- a/src/artifact/decl.rs +++ b/src/artifact/decl.rs @@ -219,7 +219,7 @@ impl DefinedDecl { } } - /// Accessir to determine the minimal alignment + /// Accessor to determine the minimal alignment pub fn get_align(&self) -> Option { match self { DefinedDecl::Data(a) => a.get_align(), From 505a1ea1da1a1860564af93b8a5db41c7f2af6c9 Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sun, 25 Aug 2019 16:32:05 +0200 Subject: [PATCH 7/7] Explain 0xaa --- src/mach.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mach.rs b/src/mach.rs index f7f00b9..1901437 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -803,6 +803,9 @@ impl<'a> Mach<'a> { if let Some(&align_pad) = self.segment.align_pad_map.get(data.name) { for _ in 0..align_pad { + // Exact padding value doesn't matter. Not using zero to prevent confusion + // with a zero pointer when the final executable accidentially reads past + // the end of a data object. file.write_all(&[0xaa])?; } } @@ -817,6 +820,7 @@ impl<'a> Mach<'a> { if let Some(&align_pad) = self.segment.align_pad_map.get(cstring.name) { for _ in 0..align_pad { + // See comment above for explanation of 0xaa file.write_all(&[0xaa])?; } } @@ -831,6 +835,7 @@ impl<'a> Mach<'a> { if let Some(&align_pad) = self.segment.align_pad_map.get(section.name) { for _ in 0..align_pad { + // See comment above for explanation of 0xaa file.write_all(&[0xaa])?; } }