Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(symcache): Allow functions with more than 65k lines #155

Merged
merged 3 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions debuginfo/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,15 @@ pub struct Function<'data> {
pub inline: bool,
}

impl Function<'_> {
/// End address of the entire function body, including inlined functions.
///
/// This address points at the first instruction after the function body.
pub fn end_address(&self) -> u64 {
self.address + self.size
}
}

impl fmt::Debug for Function<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Function")
Expand Down
3 changes: 2 additions & 1 deletion symcache/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::error::{SymCacheError, SymCacheErrorKind};
pub const SYMCACHE_MAGIC: [u8; 4] = *b"SYMC";

/// The latest version of the file format.
pub const SYMCACHE_VERSION: u32 = 3;
pub const SYMCACHE_VERSION: u32 = 4;

/// Loads binary data from a segment.
pub(crate) fn get_slice(data: &[u8], offset: usize, len: usize) -> Result<&[u8], io::Error> {
Expand Down Expand Up @@ -55,6 +55,7 @@ pub struct Seg<T, L = u32> {

impl<T, L> Seg<T, L> {
/// Creates a segment with specified offset and length.
#[inline]
pub fn new(offset: u32, len: L) -> Seg<T, L> {
Seg {
offset,
Expand Down
161 changes: 105 additions & 56 deletions symcache/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ use std::io::{self, Seek, Write};

use failure::{Fail, ResultExt};
use fnv::{FnvHashMap, FnvHashSet};
use num::FromPrimitive;

use symbolic_common::{Arch, DebugId, Language};
use symbolic_debuginfo::{DebugSession, FileInfo, Function, ObjectLike, Symbol};
use symbolic_debuginfo::{DebugSession, FileInfo, Function, LineInfo, ObjectLike, Symbol};

use crate::error::{SymCacheError, SymCacheErrorKind, ValueKind};
use crate::format;
Expand All @@ -16,21 +17,37 @@ fn is_empty_function(function: &Function<'_>) -> bool {
function.lines.is_empty() && function.inlinees.is_empty()
}

/// Performs a check whether this line has already been written in the scope of this function.
fn is_redundant_line(line: &LineInfo<'_>, line_cache: &mut LineCache) -> bool {
!line_cache.insert((line.address, line.line))
}

/// Recursively cleans a tree of functions that does not cover any lines.
///
/// This first recursively cleans all inlinees and then removes those that have become empty. Also,
/// it ensures that each function's start address includes the earliest address of any of its
/// inlinees. This is due to a requirement that a function occurs before all of its inlinees.
fn clean_function(function: &mut Function<'_>) {
/// - Removes all redundant line records (see `is_redundant_line`)
/// - Removes all empty functions (see `is_empty_function`)
/// - Ensures that the function's address range covers all of its inlinees
fn clean_function(function: &mut Function<'_>, line_cache: &mut LineCache) {
let mut inlinee_lines = LineCache::default();

for inlinee in &mut function.inlinees {
clean_function(inlinee);
clean_function(inlinee, &mut inlinee_lines);

let start_shift = function.address.saturating_sub(inlinee.address);
function.size += start_shift;
function.address -= start_shift;

let end_shift =
(inlinee.address + inlinee.size).saturating_sub(function.address + function.size);
function.size += end_shift;
}

function.inlinees.retain(|f| !is_empty_function(f));
function
.lines
.retain(|l| !is_redundant_line(l, &mut inlinee_lines));

line_cache.extend(inlinee_lines);
}

/// Low-level helper that writes segments and keeps track of the current offset.
Expand Down Expand Up @@ -162,6 +179,9 @@ struct FuncHandle {
pub record: format::FuncRecord,
}

/// A cache for line record deduplication across inline functions.
type LineCache = FnvHashSet<(u64, u64)>;

/// A high level writer that can construct SymCaches.
///
/// When using this writer directly, ensure to call [`finish`] at the end, so that all segments are
Expand Down Expand Up @@ -309,12 +329,12 @@ where
// If we encounter a function without any instructions we just skip it. This saves memory
// and since we only care about instructions where we can actually crash this is a
// reasonable optimization.
clean_function(&mut function);
clean_function(&mut function, &mut LineCache::default());
if is_empty_function(&function) {
return Ok(());
}

self.insert_function(&function, FuncRef::none(), &mut FnvHashSet::default())
self.insert_function(&function, FuncRef::none())
}

/// Persists all open segments to the writer and fixes up the header.
Expand Down Expand Up @@ -399,53 +419,16 @@ where
Ok(index)
}

fn insert_function(
fn insert_lines(
&mut self,
function: &Function<'_>,
parent_ref: FuncRef,
line_cache: &mut FnvHashSet<(u64, u64)>,
) -> Result<(), SymCacheError> {
let address = function.address;
let language = function.name.language();
let symbol_id = self.insert_symbol(function.name.as_str().into())?;
let comp_dir = self.write_path(function.compilation_dir)?;

let lang = if language as u32 > 0xff {
return Err(SymCacheErrorKind::ValueTooLarge(ValueKind::Language).into());
} else {
language as u8
};

let record = format::FuncRecord {
addr_low: (address & 0xffff_ffff) as u32,
addr_high: ((address >> 32) & 0xffff) as u16,
// XXX: we have not seen this yet, but in theory this should be
// stored as multiple function records.
len: std::cmp::min(function.size, 0xffff) as u16,
symbol_id_low: (symbol_id & 0xffff) as u16,
symbol_id_high: ((symbol_id >> 16) & 0xff) as u8,
line_records: format::Seg::default(),
parent_offset: !0,
comp_dir,
lang,
};

// Push the record first, then recurse into inline functions and finally push line records.
// This ensures that functions are pushed in the right order, while address rejection can
// prune duplicate line entries.
let function_ref = self.push_function(record, parent_ref)?;
for inlinee in &function.inlinees {
self.insert_function(inlinee, function_ref, line_cache)?;
}

let mut last_address = address;
lines: &mut std::iter::Peekable<std::slice::Iter<LineInfo<'_>>>,
start_address: u64,
end_address: u64,
) -> Result<(Vec<format::LineRecord>, u64), SymCacheError> {
let mut line_segment = vec![];
for line in &function.lines {
// Reject this line if it has been covered by one of the inlinees.
if !line_cache.insert((line.address, line.line)) {
continue;
}
let mut last_address = start_address;

while let Some(line) = lines.peek() {
let file_id = self.insert_file(&line.file)?;

// We have seen that swift can generate line records that lie outside of the function
Expand All @@ -463,15 +446,81 @@ where
line: std::cmp::min(line.line, std::u16::MAX.into()) as u16,
};
last_address += u64::from(line_record.addr_off);

// Check if we can still add a line record to this function without exceeding limits
// of the physical format. Otherwise, do an early exit and let the caller iterate.
let should_split_function = last_address - start_address > std::u16::MAX.into()
|| line_segment.len() >= std::u16::MAX.into();

if should_split_function {
return Ok((line_segment, last_address));
}

line_segment.push(line_record);
diff -= 0xff;
}

lines.next();
}

if !line_segment.is_empty() {
let record = &mut self.functions[function_ref.index as usize].record;
record.line_records = self.writer.write_segment(&line_segment, ValueKind::Line)?;
self.header.has_line_records = 1;
Ok((line_segment, end_address))
}

fn insert_function(
&mut self,
function: &Function<'_>,
parent_ref: FuncRef,
) -> Result<(), SymCacheError> {
// There are two conditions under which a function record needs to be split. When a function
// is split, its inline functions are also assigned to the according part:
// 1. Its address range exceeds 65k bytes. This makes it too large for the `len` field in
// the function record.
// 2. There are more than 65k line records. This is larger than the index used for the line
// segment.

let language = function.name.language();
let symbol_id = self.insert_symbol(function.name.as_str().into())?;
let comp_dir = self.write_path(function.compilation_dir)?;
let lang = u8::from_u32(language as u32)
.ok_or_else(|| SymCacheErrorKind::ValueTooLarge(ValueKind::Language))?;

let mut start_address = function.address;
let mut lines = function.lines.iter().peekable();

while start_address < function.end_address() {
// Insert lines for a segment of the function. This will return the list of line
// records, and the end of the segment that was written.
// - If all line records were written, the segment ends with the function.
// - Otherwise, it is the address of the subsequent line record that could not be
// written anymore. In the next iteration, output will start with this line record.
let (line_segment, end_address) =
self.insert_lines(&mut lines, start_address, function.end_address())?;

let line_records = self.writer.write_segment(&line_segment, ValueKind::Line)?;
if !line_segment.is_empty() {
self.header.has_line_records = 1;
}

let record = format::FuncRecord {
addr_low: (start_address & 0xffff_ffff) as u32,
addr_high: ((start_address >> 32) & 0xffff) as u16,
len: (end_address - start_address) as u16,
symbol_id_low: (symbol_id & 0xffff) as u16,
symbol_id_high: ((symbol_id >> 16) & 0xff) as u8,
parent_offset: !0,
line_records,
comp_dir,
lang,
};

let function_ref = self.push_function(record, parent_ref)?;
for inlinee in &function.inlinees {
if inlinee.address >= start_address && inlinee.end_address() <= end_address {
self.insert_function(inlinee, function_ref)?;
}
}

start_address = end_address;
}

Ok(())
Expand Down
4 changes: 2 additions & 2 deletions symcache/tests/snapshots/test_writer__functions_linux.snap
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
created: "2019-04-24T15:06:44.730741Z"
creator: insta@0.7.4
created: "2019-07-02T08:47:28.746328Z"
creator: insta@0.8.1
source: symcache/tests/test_writer.rs
expression: FunctionsDebug(&symcache)
---
Expand Down
4 changes: 2 additions & 2 deletions symcache/tests/snapshots/test_writer__functions_macos.snap
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
created: "2019-04-24T15:43:42.709181Z"
creator: insta@0.7.4
created: "2019-07-02T08:47:28.591628Z"
creator: insta@0.8.1
source: symcache/tests/test_writer.rs
expression: FunctionsDebug(&symcache)
---
Expand Down