Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref: Use watto for ppdb and symcache parsing #670

Merged
merged 17 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Changelog

## Unreleased

**Features**:

- The new version 8 of the symcache format saves strings slightly more compactly. Reading of version 7 is still supported. ([#670](https://github.com/getsentry/symbolic/pull/670))

**Internal**:

- Use the `watto` library for de/serialization of symcache and ppdbcache. ([#670](https://github.com/getsentry/symbolic/pull/670))

## 9.1.4

**Fixes**:
Expand Down
4 changes: 3 additions & 1 deletion symbolic-cabi/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,9 @@ impl SymbolicErrorCode {
if let Some(error) = error.downcast_ref::<Error>() {
return match error.kind() {
ErrorKind::WrongFormat => SymbolicErrorCode::SymCacheErrorBadFileMagic,
ErrorKind::HeaderTooSmall => SymbolicErrorCode::SymCacheErrorBadFileHeader,
ErrorKind::HeaderTooSmall | ErrorKind::InvalidHeader => {
SymbolicErrorCode::SymCacheErrorBadFileHeader
}
ErrorKind::WrongVersion => SymbolicErrorCode::SymCacheErrorUnsupportedVersion,
ErrorKind::BadDebugFile => SymbolicErrorCode::SymCacheErrorBadDebugFile,
_ => SymbolicErrorCode::SymCacheErrorUnknown,
Expand Down
3 changes: 1 addition & 2 deletions symbolic-ppdb/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ all-features = true

[dependencies]
indexmap = "1.9.1"
leb128 = "0.2.5"
symbolic-common = { version = "9.1.4", path = "../symbolic-common" }
watto = { version = "0.1.0", features = ["writer", "strings"] }
thiserror = "1.0.31"
uuid = "1.0.0"
zerocopy = "0.6.1"
7 changes: 1 addition & 6 deletions symbolic-ppdb/src/cache/lookup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,6 @@ impl<'data> PortablePdbCache<'data> {

/// Resolves a string reference to the pointed-to `&str` data.
fn get_string(&self, offset: u32) -> Option<&'data str> {
let reader = &mut self.string_bytes.get(offset as usize..)?;
let len = leb128::read::unsigned(reader).ok()? as usize;

let bytes = reader.get(..len)?;

std::str::from_utf8(bytes).ok()
watto::StringTable::read(self.string_bytes, offset as usize).ok()
}
}
53 changes: 18 additions & 35 deletions symbolic-ppdb/src/cache/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub(crate) mod writer;

use symbolic_common::DebugId;
use thiserror::Error;
use zerocopy::LayoutVerified;
use watto::{align_to, Pod};

const PPDBCACHE_VERSION: u32 = 1;

Expand Down Expand Up @@ -148,10 +148,8 @@ pub struct PortablePdbCache<'data> {
impl<'data> PortablePdbCache<'data> {
/// Parses the given buffer into a `PortablePdbCache`.
pub fn parse(buf: &'data [u8]) -> Result<Self, CacheError> {
let (lv, rest) = LayoutVerified::<_, raw::Header>::new_from_prefix(buf)
.ok_or(CacheErrorKind::InvalidHeader)?;

let header = lv.into_ref();
let (header, rest) =
raw::Header::ref_from_prefix(buf).ok_or(CacheErrorKind::InvalidHeader)?;

if header.magic == raw::PPDBCACHE_MAGIC_FLIPPED {
return Err(CacheErrorKind::WrongEndianness.into());
Expand All @@ -164,34 +162,26 @@ impl<'data> PortablePdbCache<'data> {
return Err(CacheErrorKind::WrongVersion(header.version).into());
}

let rest = align_buf(rest);
let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidFiles)?;

let (lv, rest) = LayoutVerified::<_, [raw::File]>::new_slice_from_prefix(
rest,
header.num_files as usize,
)
.ok_or(CacheErrorKind::InvalidFiles)?;
let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize)
.ok_or(CacheErrorKind::InvalidFiles)?;

let files = lv.into_slice();
let rest = align_buf(rest);
let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidSourceLocations)?;

let (lv, rest) = LayoutVerified::<_, [raw::SourceLocation]>::new_slice_from_prefix(
rest,
header.num_ranges as usize,
)
.ok_or(CacheErrorKind::InvalidSourceLocations)?;
let (source_locations, rest) =
raw::SourceLocation::slice_from_prefix(rest, header.num_ranges as usize)
.ok_or(CacheErrorKind::InvalidSourceLocations)?;

let source_locations = lv.into_slice();
let rest = align_buf(rest);
let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidRanges)?;

let (lv, rest) = LayoutVerified::<_, [raw::Range]>::new_slice_from_prefix(
rest,
header.num_ranges as usize,
)
.ok_or(CacheErrorKind::InvalidRanges)?;
let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize)
.ok_or(CacheErrorKind::InvalidRanges)?;

let ranges = lv.into_slice();
let rest = align_buf(rest);
let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::UnexpectedStringBytes {
expected: header.string_bytes as usize,
found: 0,
})?;

if rest.len() < header.string_bytes as usize {
return Err(CacheErrorKind::UnexpectedStringBytes {
Expand All @@ -212,9 +202,7 @@ impl<'data> PortablePdbCache<'data> {

/// Returns the [`DebugId`] of this portable PDB cache.
pub fn debug_id(&self) -> DebugId {
let (guid, age) = self.header.pdb_id.split_at(16);
let age = u32::from_ne_bytes(age.try_into().unwrap());
DebugId::from_guid_age(guid, age).unwrap()
self.header.pdb_id
}
}

Expand All @@ -229,8 +217,3 @@ impl<'data> std::fmt::Debug for PortablePdbCache<'data> {
.finish()
}
}

fn align_buf(buf: &[u8]) -> &[u8] {
let offset = buf.as_ptr().align_offset(8);
buf.get(offset..).unwrap_or(&[])
}
40 changes: 34 additions & 6 deletions symbolic-ppdb/src/cache/raw.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use zerocopy::{AsBytes, FromBytes};
use symbolic_common::DebugId;
use watto::Pod;

/// The magic file preamble as individual bytes.
const PPDBCACHE_MAGIC_BYTES: [u8; 4] = *b"PDBC";
Expand All @@ -11,15 +12,15 @@ pub(crate) const PPDBCACHE_MAGIC: u32 = u32::from_le_bytes(PPDBCACHE_MAGIC_BYTES
pub(crate) const PPDBCACHE_MAGIC_FLIPPED: u32 = PPDBCACHE_MAGIC.swap_bytes();

/// The header of a PortablePdbCache file.
#[derive(Debug, Clone, FromBytes, AsBytes)]
#[derive(Debug, Clone)]
#[repr(C)]
pub(crate) struct Header {
/// The file magic representing the file format and endianness.
pub(crate) magic: u32,
/// The PortablePdbCache format version.
pub(crate) version: u32,
/// A byte sequence uniquely representing the debugging metadata blob content.
pub(crate) pdb_id: [u8; 20],
pub(crate) pdb_id: DebugId,
/// The number of files contained in the cache file.
pub(crate) num_files: u32,
/// The number of ranges/source locations contained in the cache file.
Expand All @@ -32,7 +33,7 @@ pub(crate) struct Header {
}

/// A location in a source file, comprising a line and the index of a file.
#[derive(Debug, Clone, Copy, FromBytes, AsBytes)]
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub(crate) struct SourceLocation {
pub(crate) line: u32,
Expand All @@ -43,19 +44,46 @@ pub(crate) struct SourceLocation {
///
/// Only the starting IL offset is saved; the ending offset is given implicitly by
/// the starting offset of the next range (if any).
#[derive(Debug, Clone, Copy, FromBytes, AsBytes, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(C)]
pub(crate) struct Range {
pub(crate) func_idx: u32,
pub(crate) il_offset: u32,
}

/// Serialized file in the cache.
#[derive(Debug, Clone, Copy, FromBytes, AsBytes, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(C)]
pub(crate) struct File {
/// The file path (reference to a [`String`]).
pub(crate) name_offset: u32,
/// The file's source language.
pub(crate) lang: u32,
}

unsafe impl Pod for Header {}
unsafe impl Pod for SourceLocation {}
unsafe impl Pod for Range {}
unsafe impl Pod for File {}

#[cfg(test)]
mod tests {
use std::mem;

use super::*;

#[test]
fn test_sizeof() {
assert_eq!(mem::size_of::<Header>(), 68);
assert_eq!(mem::align_of::<Header>(), 4);

assert_eq!(mem::size_of::<File>(), 8);
assert_eq!(mem::align_of::<File>(), 4);

assert_eq!(mem::size_of::<SourceLocation>(), 8);
assert_eq!(mem::align_of::<SourceLocation>(), 4);

assert_eq!(mem::size_of::<Range>(), 8);
assert_eq!(mem::align_of::<Range>(), 4);
}
}
102 changes: 19 additions & 83 deletions symbolic-ppdb/src/cache/writer.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::collections::{BTreeMap, HashMap};
use std::collections::BTreeMap;
use std::io::Write;

use indexmap::IndexSet;
use symbolic_common::Language;
use zerocopy::AsBytes;
use symbolic_common::{DebugId, Language};
use watto::{Pod, StringTable};

use super::{raw, CacheError};
use crate::PortablePdb;
Expand All @@ -15,18 +15,15 @@ use crate::PortablePdb;
#[derive(Debug, Default)]
pub struct PortablePdbCacheConverter {
/// A byte sequence uniquely representing the debugging metadata blob content.
pdb_id: [u8; 20],
pdb_id: DebugId,
/// The set of all [`raw::File`]s that have been added to this `Converter`.
files: IndexSet<raw::File>,
/// The concatenation of all strings that have been added to this `Converter`.
string_bytes: Vec<u8>,
/// A map from [`String`]s that have been added to this `Converter` to their offsets in the `string_bytes` field.
strings: HashMap<String, u32>,
/// A map from [`raw::Range`]s to the [`raw::SourceLocation`]s they correspond to.
///
/// Only the starting address of a range is saved, the end address is given implicitly
/// by the start address of the next range.
ranges: BTreeMap<raw::Range, raw::SourceLocation>,
string_table: StringTable,
}

impl PortablePdbCacheConverter {
Expand Down Expand Up @@ -69,11 +66,11 @@ impl PortablePdbCacheConverter {
///
/// This writes the PortablePdbCache binary format into the given [`Write`].
pub fn serialize<W: Write>(self, writer: &mut W) -> std::io::Result<()> {
let mut writer = WriteWrapper::new(writer);
let mut writer = watto::Writer::new(writer);

let num_ranges = self.ranges.len() as u32;
let num_files = self.files.len() as u32;
let string_bytes = self.string_bytes.len() as u32;
let string_bytes = self.string_table.into_bytes();

let header = raw::Header {
magic: raw::PPDBCACHE_MAGIC,
Expand All @@ -83,105 +80,44 @@ impl PortablePdbCacheConverter {

num_files,
num_ranges,
string_bytes,
string_bytes: string_bytes.len() as u32,
_reserved: [0; 16],
};

writer.write(header.as_bytes())?;
writer.align()?;
writer.write_all(header.as_bytes())?;
writer.align_to(8)?;

for file in self.files.into_iter() {
writer.write(file.as_bytes())?;
writer.write_all(file.as_bytes())?;
}
writer.align()?;
writer.align_to(8)?;

for sl in self.ranges.values() {
writer.write(sl.as_bytes())?;
writer.write_all(sl.as_bytes())?;
}
writer.align()?;
writer.align_to(8)?;

for r in self.ranges.keys() {
writer.write(r.as_bytes())?;
writer.write_all(r.as_bytes())?;
}
writer.align()?;
writer.align_to(8)?;

writer.write(&self.string_bytes)?;
writer.write_all(&string_bytes)?;

Ok(())
}

fn set_pdb_id(&mut self, id: [u8; 20]) {
fn set_pdb_id(&mut self, id: DebugId) {
self.pdb_id = id;
}

fn insert_file(&mut self, name: &str, lang: Language) -> u32 {
let name_offset = self.insert_string(name);
let name_offset = self.string_table.insert(name) as u32;
let file = raw::File {
name_offset,
lang: lang as u32,
};

self.files.insert_full(file).0 as u32
}

/// Insert a string into this converter.
///
/// If the string was already present, it is not added again. A newly added string
/// is prefixed by its length in LEB128 encoding. The returned `u32`
/// is the offset into the `string_bytes` field where the string is saved.
fn insert_string(&mut self, s: &str) -> u32 {
let Self {
ref mut strings,
ref mut string_bytes,
..
} = self;
if s.is_empty() {
return u32::MAX;
}
if let Some(&offset) = strings.get(s) {
return offset;
}
let string_offset = string_bytes.len() as u32;
let string_len = s.len() as u64;
leb128::write::unsigned(string_bytes, string_len).unwrap();
string_bytes.extend(s.bytes());

strings.insert(s.to_owned(), string_offset);
string_offset
}
}

struct WriteWrapper<W> {
writer: W,
position: usize,
}

impl<W: Write> WriteWrapper<W> {
fn new(writer: W) -> Self {
Self {
writer,
position: 0,
}
}

fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
let len = data.len();
self.writer.write_all(data)?;
self.position += len;
Ok(len)
}

fn align(&mut self) -> std::io::Result<usize> {
let buf = &[0u8; 7];
let len = {
let to_align = self.position;
let remainder = to_align % 8;
if remainder == 0 {
remainder
} else {
8 - remainder
}
};
self.write(&buf[0..len])
}
}
Loading