Merge branch 'master' into ref/unreal4

* master: fix(minidump): Do not emit default CFI for the .ra register (#157) ref(example): Output debug ids in breakpad format in dump_cfi feat(example): Output a breakpad module header in dump_cfi build: Switch to crates releases fix(symcache): Allow functions with more than 65k lines (#155)
getsentry · Jul 10, 2019 · 24ef277 · 24ef277
2 parents f7ce554 + 51f0516
commit 24ef277
Show file tree

Hide file tree

Showing 12 changed files with 255 additions and 211 deletions.
diff --git a/.craft.yml b/.craft.yml
@@ -4,7 +4,7 @@ github:
   repo: symbolic
 preReleaseCommand: bash scripts/bump-version
 targets:
-  # - name: crates
+  - name: crates
   - name: pypi
   - name: github
 changelogPolicy: simple
diff --git a/cabi/Cargo.lock b/cabi/Cargo.lock
diff --git a/debuginfo/Cargo.toml b/debuginfo/Cargo.toml
@@ -25,9 +25,9 @@ failure = "0.1.5"
 fallible-iterator = "0.2.0"
 flate2 = { version = "1.0.7", features = ["rust_backend"], default-features = false }
 gimli = { version = "0.18.0", features = ["read", "std"], default-features = false }
-goblin = { git = "https://github.com/jan-auer/goblin", rev = "dd199e4b9027c9ca4389d5b066f5e264ef507e43" }
+goblin = "0.0.23"
 lazycell = "1.2.1"
-pdb =  { git = "https://github.com/jan-auer/pdb", rev = "da040f145d98c1d86da7cda3894fbc4dae1ba796" }
+pdb =  "0.5.0"
 parking_lot = "0.8.0"
 pest = "2.1.1"
 pest_derive = "2.1.0"

diff --git a/debuginfo/src/base.rs b/debuginfo/src/base.rs
@@ -467,6 +467,15 @@ pub struct Function<'data> {
     pub inline: bool,
 }
 
+impl Function<'_> {
+    /// End address of the entire function body, including inlined functions.
+    ///
+    /// This address points at the first instruction after the function body.
+    pub fn end_address(&self) -> u64 {
+        self.address + self.size
+    }
+}
+
 impl fmt::Debug for Function<'_> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         f.debug_struct("Function")

diff --git a/examples/dump_cfi.rs b/examples/dump_cfi.rs
@@ -21,6 +21,15 @@ fn dump_cfi<P: AsRef<Path>>(path: P) -> Result<(), Error> {
     let buffer = ByteView::open(path)?;
     let object = Object::parse(&buffer)?;
 
+    println!(
+        "MODULE unknown {} {} {}",
+        object.arch(),
+        object.debug_id().breakpad(),
+        path.file_name()
+            .map(|s| s.to_string_lossy())
+            .unwrap_or_default(),
+    );
+
     AsciiCfiWriter::new(std::io::stdout()).process(&object)?;
 
     Ok(())

diff --git a/minidump/src/cfi.rs b/minidump/src/cfi.rs
@@ -357,15 +357,6 @@ impl<W: Write> AsciiCfiWriter<W> {
                     }
                 }
 
-                // Breakpad STACK CFI records must provide a .ra rule, but DWARF CFI may not
-                // establish any rule for .ra if the return address column is an ordinary register,
-                // and that register holds the return address on entry to the function. So establish
-                // a .ra rule citing the return address register.
-                if !rule_cache.contains_key(&ra) {
-                    let rule = RegisterRule::SameValue::<R>;
-                    written |= Self::write_register_rule(&mut line, info.arch, ra, &rule, ra)?;
-                }
-
                 if written {
                     self.inner
                         .write_all(&line)

diff --git a/minidump/tests/snapshots/test_cfi__cfi_elf.snap b/minidump/tests/snapshots/test_cfi__cfi_elf.snap
@@ -1,10 +1,10 @@
 ---
-created: "2019-04-24T14:55:14.510136Z"
-creator: insta@0.7.4
+created: "2019-07-05T18:52:40.332785Z"
+creator: insta@0.8.1
 source: minidump/tests/test_cfi.rs
 expression: cfi
 ---
-STACK CFI INIT 1dc0 2a .cfa: $rsp 8 + .ra: $rip
+STACK CFI INIT 1dc0 2a .cfa: $rsp 8 +
 STACK CFI INIT 1580 370 .cfa: $rsp 16 + .ra: .cfa -8 + ^
 STACK CFI 1586 .cfa: $rsp 24 +
 STACK CFI INIT 1ec0 3b .cfa: $rsp 8 + .ra: .cfa -8 + ^

diff --git a/py/tests/res/minidump/crash_linux.sym b/py/tests/res/minidump/crash_linux.sym
@@ -1,4 +1,4 @@
-STACK CFI INIT 1f10 2a .cfa: $rsp 8 + .ra: $rip
+STACK CFI INIT 1f10 2a .cfa: $rsp 8 +
 STACK CFI INIT 1a90 470 .cfa: $rsp 16 + .ra: .cfa -8 + ^
 STACK CFI 1a96 .cfa: $rsp 24 +
 STACK CFI INIT 21d4 3a .cfa: $rsp 8 + .ra: .cfa -8 + ^

diff --git a/symcache/src/format.rs b/symcache/src/format.rs
@@ -15,7 +15,7 @@ use crate::error::{SymCacheError, SymCacheErrorKind};
 pub const SYMCACHE_MAGIC: [u8; 4] = *b"SYMC";
 
 /// The latest version of the file format.
-pub const SYMCACHE_VERSION: u32 = 3;
+pub const SYMCACHE_VERSION: u32 = 4;
 
 /// Loads binary data from a segment.
 pub(crate) fn get_slice(data: &[u8], offset: usize, len: usize) -> Result<&[u8], io::Error> {
@@ -55,6 +55,7 @@ pub struct Seg<T, L = u32> {
 
 impl<T, L> Seg<T, L> {
     /// Creates a segment with specified offset and length.
+    #[inline]
     pub fn new(offset: u32, len: L) -> Seg<T, L> {
         Seg {
             offset,

diff --git a/symcache/src/writer.rs b/symcache/src/writer.rs
@@ -4,9 +4,10 @@ use std::io::{self, Seek, Write};
 
 use failure::{Fail, ResultExt};
 use fnv::{FnvHashMap, FnvHashSet};
+use num::FromPrimitive;
 
 use symbolic_common::{Arch, DebugId, Language};
-use symbolic_debuginfo::{DebugSession, FileInfo, Function, ObjectLike, Symbol};
+use symbolic_debuginfo::{DebugSession, FileInfo, Function, LineInfo, ObjectLike, Symbol};
 
 use crate::error::{SymCacheError, SymCacheErrorKind, ValueKind};
 use crate::format;
@@ -16,21 +17,37 @@ fn is_empty_function(function: &Function<'_>) -> bool {
     function.lines.is_empty() && function.inlinees.is_empty()
 }
 
+/// Performs a check whether this line has already been written in the scope of this function.
+fn is_redundant_line(line: &LineInfo<'_>, line_cache: &mut LineCache) -> bool {
+    !line_cache.insert((line.address, line.line))
+}
+
 /// Recursively cleans a tree of functions that does not cover any lines.
 ///
-/// This first recursively cleans all inlinees and then removes those that have become empty. Also,
-/// it ensures that each function's start address includes the earliest address of any of its
-/// inlinees. This is due to a requirement that a function occurs before all of its inlinees.
-fn clean_function(function: &mut Function<'_>) {
+///  - Removes all redundant line records (see `is_redundant_line`)
+///  - Removes all empty functions (see `is_empty_function`)
+///  - Ensures that the function's address range covers all of its inlinees
+fn clean_function(function: &mut Function<'_>, line_cache: &mut LineCache) {
+    let mut inlinee_lines = LineCache::default();
+
     for inlinee in &mut function.inlinees {
-        clean_function(inlinee);
+        clean_function(inlinee, &mut inlinee_lines);
 
         let start_shift = function.address.saturating_sub(inlinee.address);
         function.size += start_shift;
         function.address -= start_shift;
+
+        let end_shift =
+            (inlinee.address + inlinee.size).saturating_sub(function.address + function.size);
+        function.size += end_shift;
     }
 
     function.inlinees.retain(|f| !is_empty_function(f));
+    function
+        .lines
+        .retain(|l| !is_redundant_line(l, &mut inlinee_lines));
+
+    line_cache.extend(inlinee_lines);
 }
 
 /// Low-level helper that writes segments and keeps track of the current offset.
@@ -162,6 +179,9 @@ struct FuncHandle {
     pub record: format::FuncRecord,
 }
 
+/// A cache for line record deduplication across inline functions.
+type LineCache = FnvHashSet<(u64, u64)>;
+
 /// A high level writer that can construct SymCaches.
 ///
 /// When using this writer directly, ensure to call [`finish`] at the end, so that all segments are
@@ -309,12 +329,12 @@ where
         // If we encounter a function without any instructions we just skip it.  This saves memory
         // and since we only care about instructions where we can actually crash this is a
         // reasonable optimization.
-        clean_function(&mut function);
+        clean_function(&mut function, &mut LineCache::default());
         if is_empty_function(&function) {
             return Ok(());
         }
 
-        self.insert_function(&function, FuncRef::none(), &mut FnvHashSet::default())
+        self.insert_function(&function, FuncRef::none())
     }
 
     /// Persists all open segments to the writer and fixes up the header.
@@ -399,53 +419,16 @@ where
         Ok(index)
     }
 
-    fn insert_function(
+    fn insert_lines(
         &mut self,
-        function: &Function<'_>,
-        parent_ref: FuncRef,
-        line_cache: &mut FnvHashSet<(u64, u64)>,
-    ) -> Result<(), SymCacheError> {
-        let address = function.address;
-        let language = function.name.language();
-        let symbol_id = self.insert_symbol(function.name.as_str().into())?;
-        let comp_dir = self.write_path(function.compilation_dir)?;
-
-        let lang = if language as u32 > 0xff {
-            return Err(SymCacheErrorKind::ValueTooLarge(ValueKind::Language).into());
-        } else {
-            language as u8
-        };
-
-        let record = format::FuncRecord {
-            addr_low: (address & 0xffff_ffff) as u32,
-            addr_high: ((address >> 32) & 0xffff) as u16,
-            // XXX: we have not seen this yet, but in theory this should be
-            // stored as multiple function records.
-            len: std::cmp::min(function.size, 0xffff) as u16,
-            symbol_id_low: (symbol_id & 0xffff) as u16,
-            symbol_id_high: ((symbol_id >> 16) & 0xff) as u8,
-            line_records: format::Seg::default(),
-            parent_offset: !0,
-            comp_dir,
-            lang,
-        };
-
-        // Push the record first, then recurse into inline functions and finally push line records.
-        // This ensures that functions are pushed in the right order, while address rejection can
-        // prune duplicate line entries.
-        let function_ref = self.push_function(record, parent_ref)?;
-        for inlinee in &function.inlinees {
-            self.insert_function(inlinee, function_ref, line_cache)?;
-        }
-
-        let mut last_address = address;
+        lines: &mut std::iter::Peekable<std::slice::Iter<LineInfo<'_>>>,
+        start_address: u64,
+        end_address: u64,
+    ) -> Result<(Vec<format::LineRecord>, u64), SymCacheError> {
         let mut line_segment = vec![];
-        for line in &function.lines {
-            // Reject this line if it has been covered by one of the inlinees.
-            if !line_cache.insert((line.address, line.line)) {
-                continue;
-            }
+        let mut last_address = start_address;
 
+        while let Some(line) = lines.peek() {
             let file_id = self.insert_file(&line.file)?;
 
             // We have seen that swift can generate line records that lie outside of the function
@@ -463,15 +446,81 @@ where
                     line: std::cmp::min(line.line, std::u16::MAX.into()) as u16,
                 };
                 last_address += u64::from(line_record.addr_off);
+
+                // Check if we can still add a line record to this function without exceeding limits
+                // of the physical format. Otherwise, do an early exit and let the caller iterate.
+                let should_split_function = last_address - start_address > std::u16::MAX.into()
+                    || line_segment.len() >= std::u16::MAX.into();
+
+                if should_split_function {
+                    return Ok((line_segment, last_address));
+                }
+
                 line_segment.push(line_record);
                 diff -= 0xff;
             }
+
+            lines.next();
         }
 
-        if !line_segment.is_empty() {
-            let record = &mut self.functions[function_ref.index as usize].record;
-            record.line_records = self.writer.write_segment(&line_segment, ValueKind::Line)?;
-            self.header.has_line_records = 1;
+        Ok((line_segment, end_address))
+    }
+
+    fn insert_function(
+        &mut self,
+        function: &Function<'_>,
+        parent_ref: FuncRef,
+    ) -> Result<(), SymCacheError> {
+        // There are two conditions under which a function record needs to be split. When a function
+        // is split, its inline functions are also assigned to the according part:
+        //  1. Its address range exceeds 65k bytes. This makes it too large for the `len` field in
+        //     the function record.
+        //  2. There are more than 65k line records. This is larger than the index used for the line
+        //     segment.
+
+        let language = function.name.language();
+        let symbol_id = self.insert_symbol(function.name.as_str().into())?;
+        let comp_dir = self.write_path(function.compilation_dir)?;
+        let lang = u8::from_u32(language as u32)
+            .ok_or_else(|| SymCacheErrorKind::ValueTooLarge(ValueKind::Language))?;
+
+        let mut start_address = function.address;
+        let mut lines = function.lines.iter().peekable();
+
+        while start_address < function.end_address() {
+            // Insert lines for a segment of the function. This will return the list of line
+            // records, and the end of the segment that was written.
+            //  - If all line records were written, the segment ends with the function.
+            //  - Otherwise, it is the address of the subsequent line record that could not be
+            //    written anymore. In the next iteration, output will start with this line record.
+            let (line_segment, end_address) =
+                self.insert_lines(&mut lines, start_address, function.end_address())?;
+
+            let line_records = self.writer.write_segment(&line_segment, ValueKind::Line)?;
+            if !line_segment.is_empty() {
+                self.header.has_line_records = 1;
+            }
+
+            let record = format::FuncRecord {
+                addr_low: (start_address & 0xffff_ffff) as u32,
+                addr_high: ((start_address >> 32) & 0xffff) as u16,
+                len: (end_address - start_address) as u16,
+                symbol_id_low: (symbol_id & 0xffff) as u16,
+                symbol_id_high: ((symbol_id >> 16) & 0xff) as u8,
+                parent_offset: !0,
+                line_records,
+                comp_dir,
+                lang,
+            };
+
+            let function_ref = self.push_function(record, parent_ref)?;
+            for inlinee in &function.inlinees {
+                if inlinee.address >= start_address && inlinee.end_address() <= end_address {
+                    self.insert_function(inlinee, function_ref)?;
+                }
+            }
+
+            start_address = end_address;
         }
 
         Ok(())

diff --git a/symcache/tests/snapshots/test_writer__functions_linux.snap b/symcache/tests/snapshots/test_writer__functions_linux.snap
@@ -1,6 +1,6 @@
 ---
-created: "2019-04-24T15:06:44.730741Z"
-creator: insta@0.7.4
+created: "2019-07-02T08:47:28.746328Z"
+creator: insta@0.8.1
 source: symcache/tests/test_writer.rs
 expression: FunctionsDebug(&symcache)
 ---

diff --git a/symcache/tests/snapshots/test_writer__functions_macos.snap b/symcache/tests/snapshots/test_writer__functions_macos.snap
@@ -1,6 +1,6 @@
 ---
-created: "2019-04-24T15:43:42.709181Z"
-creator: insta@0.7.4
+created: "2019-07-02T08:47:28.591628Z"
+creator: insta@0.8.1
 source: symcache/tests/test_writer.rs
 expression: FunctionsDebug(&symcache)
 ---