diff --git a/CHANGELOG.md b/CHANGELOG.md index a14b2c43c..3c6331464 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Changelog +## Unreleased + +**Features**: + +- The new version 8 of the symcache format saves strings slightly more compactly. Reading of version 7 is still supported. ([#670](https://github.com/getsentry/symbolic/pull/670)) + +**Internal**: + +- Use the `watto` library for de/serialization of symcache and ppdbcache. ([#670](https://github.com/getsentry/symbolic/pull/670)) + ## 9.1.4 **Fixes**: diff --git a/symbolic-cabi/src/core.rs b/symbolic-cabi/src/core.rs index a4cada6e2..9664ca058 100644 --- a/symbolic-cabi/src/core.rs +++ b/symbolic-cabi/src/core.rs @@ -255,7 +255,9 @@ impl SymbolicErrorCode { if let Some(error) = error.downcast_ref::() { return match error.kind() { ErrorKind::WrongFormat => SymbolicErrorCode::SymCacheErrorBadFileMagic, - ErrorKind::HeaderTooSmall => SymbolicErrorCode::SymCacheErrorBadFileHeader, + ErrorKind::HeaderTooSmall | ErrorKind::InvalidHeader => { + SymbolicErrorCode::SymCacheErrorBadFileHeader + } ErrorKind::WrongVersion => SymbolicErrorCode::SymCacheErrorUnsupportedVersion, ErrorKind::BadDebugFile => SymbolicErrorCode::SymCacheErrorBadDebugFile, _ => SymbolicErrorCode::SymCacheErrorUnknown, diff --git a/symbolic-ppdb/Cargo.toml b/symbolic-ppdb/Cargo.toml index 2dc86ca05..397ad00aa 100644 --- a/symbolic-ppdb/Cargo.toml +++ b/symbolic-ppdb/Cargo.toml @@ -21,8 +21,7 @@ all-features = true [dependencies] indexmap = "1.9.1" -leb128 = "0.2.5" symbolic-common = { version = "9.1.4", path = "../symbolic-common" } +watto = { version = "0.1.0", features = ["writer", "strings"] } thiserror = "1.0.31" uuid = "1.0.0" -zerocopy = "0.6.1" diff --git a/symbolic-ppdb/src/cache/lookup.rs b/symbolic-ppdb/src/cache/lookup.rs index 7cf6bb142..dbac957be 100644 --- a/symbolic-ppdb/src/cache/lookup.rs +++ b/symbolic-ppdb/src/cache/lookup.rs @@ -62,11 +62,6 @@ impl<'data> PortablePdbCache<'data> { /// Resolves a string reference to the pointed-to `&str` data. fn get_string(&self, offset: u32) -> Option<&'data str> { - let reader = &mut self.string_bytes.get(offset as usize..)?; - let len = leb128::read::unsigned(reader).ok()? as usize; - - let bytes = reader.get(..len)?; - - std::str::from_utf8(bytes).ok() + watto::StringTable::read(self.string_bytes, offset as usize).ok() } } diff --git a/symbolic-ppdb/src/cache/mod.rs b/symbolic-ppdb/src/cache/mod.rs index 2d3cde1b1..29e69a6d3 100644 --- a/symbolic-ppdb/src/cache/mod.rs +++ b/symbolic-ppdb/src/cache/mod.rs @@ -53,7 +53,7 @@ pub(crate) mod writer; use symbolic_common::DebugId; use thiserror::Error; -use zerocopy::LayoutVerified; +use watto::{align_to, Pod}; const PPDBCACHE_VERSION: u32 = 1; @@ -148,10 +148,8 @@ pub struct PortablePdbCache<'data> { impl<'data> PortablePdbCache<'data> { /// Parses the given buffer into a `PortablePdbCache`. pub fn parse(buf: &'data [u8]) -> Result { - let (lv, rest) = LayoutVerified::<_, raw::Header>::new_from_prefix(buf) - .ok_or(CacheErrorKind::InvalidHeader)?; - - let header = lv.into_ref(); + let (header, rest) = + raw::Header::ref_from_prefix(buf).ok_or(CacheErrorKind::InvalidHeader)?; if header.magic == raw::PPDBCACHE_MAGIC_FLIPPED { return Err(CacheErrorKind::WrongEndianness.into()); @@ -164,34 +162,26 @@ impl<'data> PortablePdbCache<'data> { return Err(CacheErrorKind::WrongVersion(header.version).into()); } - let rest = align_buf(rest); + let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidFiles)?; - let (lv, rest) = LayoutVerified::<_, [raw::File]>::new_slice_from_prefix( - rest, - header.num_files as usize, - ) - .ok_or(CacheErrorKind::InvalidFiles)?; + let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize) + .ok_or(CacheErrorKind::InvalidFiles)?; - let files = lv.into_slice(); - let rest = align_buf(rest); + let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidSourceLocations)?; - let (lv, rest) = LayoutVerified::<_, [raw::SourceLocation]>::new_slice_from_prefix( - rest, - header.num_ranges as usize, - ) - .ok_or(CacheErrorKind::InvalidSourceLocations)?; + let (source_locations, rest) = + raw::SourceLocation::slice_from_prefix(rest, header.num_ranges as usize) + .ok_or(CacheErrorKind::InvalidSourceLocations)?; - let source_locations = lv.into_slice(); - let rest = align_buf(rest); + let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::InvalidRanges)?; - let (lv, rest) = LayoutVerified::<_, [raw::Range]>::new_slice_from_prefix( - rest, - header.num_ranges as usize, - ) - .ok_or(CacheErrorKind::InvalidRanges)?; + let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize) + .ok_or(CacheErrorKind::InvalidRanges)?; - let ranges = lv.into_slice(); - let rest = align_buf(rest); + let (_, rest) = align_to(rest, 8).ok_or(CacheErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: 0, + })?; if rest.len() < header.string_bytes as usize { return Err(CacheErrorKind::UnexpectedStringBytes { @@ -212,9 +202,7 @@ impl<'data> PortablePdbCache<'data> { /// Returns the [`DebugId`] of this portable PDB cache. pub fn debug_id(&self) -> DebugId { - let (guid, age) = self.header.pdb_id.split_at(16); - let age = u32::from_ne_bytes(age.try_into().unwrap()); - DebugId::from_guid_age(guid, age).unwrap() + self.header.pdb_id } } @@ -229,8 +217,3 @@ impl<'data> std::fmt::Debug for PortablePdbCache<'data> { .finish() } } - -fn align_buf(buf: &[u8]) -> &[u8] { - let offset = buf.as_ptr().align_offset(8); - buf.get(offset..).unwrap_or(&[]) -} diff --git a/symbolic-ppdb/src/cache/raw.rs b/symbolic-ppdb/src/cache/raw.rs index 5a99e3e1f..2fc9a8910 100644 --- a/symbolic-ppdb/src/cache/raw.rs +++ b/symbolic-ppdb/src/cache/raw.rs @@ -1,4 +1,5 @@ -use zerocopy::{AsBytes, FromBytes}; +use symbolic_common::DebugId; +use watto::Pod; /// The magic file preamble as individual bytes. const PPDBCACHE_MAGIC_BYTES: [u8; 4] = *b"PDBC"; @@ -11,7 +12,7 @@ pub(crate) const PPDBCACHE_MAGIC: u32 = u32::from_le_bytes(PPDBCACHE_MAGIC_BYTES pub(crate) const PPDBCACHE_MAGIC_FLIPPED: u32 = PPDBCACHE_MAGIC.swap_bytes(); /// The header of a PortablePdbCache file. -#[derive(Debug, Clone, FromBytes, AsBytes)] +#[derive(Debug, Clone)] #[repr(C)] pub(crate) struct Header { /// The file magic representing the file format and endianness. @@ -19,7 +20,7 @@ pub(crate) struct Header { /// The PortablePdbCache format version. pub(crate) version: u32, /// A byte sequence uniquely representing the debugging metadata blob content. - pub(crate) pdb_id: [u8; 20], + pub(crate) pdb_id: DebugId, /// The number of files contained in the cache file. pub(crate) num_files: u32, /// The number of ranges/source locations contained in the cache file. @@ -32,7 +33,7 @@ pub(crate) struct Header { } /// A location in a source file, comprising a line and the index of a file. -#[derive(Debug, Clone, Copy, FromBytes, AsBytes)] +#[derive(Debug, Clone, Copy)] #[repr(C)] pub(crate) struct SourceLocation { pub(crate) line: u32, @@ -43,7 +44,7 @@ pub(crate) struct SourceLocation { /// /// Only the starting IL offset is saved; the ending offset is given implicitly by /// the starting offset of the next range (if any). -#[derive(Debug, Clone, Copy, FromBytes, AsBytes, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(C)] pub(crate) struct Range { pub(crate) func_idx: u32, @@ -51,7 +52,7 @@ pub(crate) struct Range { } /// Serialized file in the cache. -#[derive(Debug, Clone, Copy, FromBytes, AsBytes, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(C)] pub(crate) struct File { /// The file path (reference to a [`String`]). @@ -59,3 +60,30 @@ pub(crate) struct File { /// The file's source language. pub(crate) lang: u32, } + +unsafe impl Pod for Header {} +unsafe impl Pod for SourceLocation {} +unsafe impl Pod for Range {} +unsafe impl Pod for File {} + +#[cfg(test)] +mod tests { + use std::mem; + + use super::*; + + #[test] + fn test_sizeof() { + assert_eq!(mem::size_of::
(), 68); + assert_eq!(mem::align_of::
(), 4); + + assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::align_of::(), 4); + } +} diff --git a/symbolic-ppdb/src/cache/writer.rs b/symbolic-ppdb/src/cache/writer.rs index 1391b471e..228f94583 100644 --- a/symbolic-ppdb/src/cache/writer.rs +++ b/symbolic-ppdb/src/cache/writer.rs @@ -1,9 +1,9 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::io::Write; use indexmap::IndexSet; -use symbolic_common::Language; -use zerocopy::AsBytes; +use symbolic_common::{DebugId, Language}; +use watto::{Pod, StringTable}; use super::{raw, CacheError}; use crate::PortablePdb; @@ -15,18 +15,15 @@ use crate::PortablePdb; #[derive(Debug, Default)] pub struct PortablePdbCacheConverter { /// A byte sequence uniquely representing the debugging metadata blob content. - pdb_id: [u8; 20], + pdb_id: DebugId, /// The set of all [`raw::File`]s that have been added to this `Converter`. files: IndexSet, - /// The concatenation of all strings that have been added to this `Converter`. - string_bytes: Vec, - /// A map from [`String`]s that have been added to this `Converter` to their offsets in the `string_bytes` field. - strings: HashMap, /// A map from [`raw::Range`]s to the [`raw::SourceLocation`]s they correspond to. /// /// Only the starting address of a range is saved, the end address is given implicitly /// by the start address of the next range. ranges: BTreeMap, + string_table: StringTable, } impl PortablePdbCacheConverter { @@ -69,11 +66,11 @@ impl PortablePdbCacheConverter { /// /// This writes the PortablePdbCache binary format into the given [`Write`]. pub fn serialize(self, writer: &mut W) -> std::io::Result<()> { - let mut writer = WriteWrapper::new(writer); + let mut writer = watto::Writer::new(writer); let num_ranges = self.ranges.len() as u32; let num_files = self.files.len() as u32; - let string_bytes = self.string_bytes.len() as u32; + let string_bytes = self.string_table.into_bytes(); let header = raw::Header { magic: raw::PPDBCACHE_MAGIC, @@ -83,39 +80,39 @@ impl PortablePdbCacheConverter { num_files, num_ranges, - string_bytes, + string_bytes: string_bytes.len() as u32, _reserved: [0; 16], }; - writer.write(header.as_bytes())?; - writer.align()?; + writer.write_all(header.as_bytes())?; + writer.align_to(8)?; for file in self.files.into_iter() { - writer.write(file.as_bytes())?; + writer.write_all(file.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; for sl in self.ranges.values() { - writer.write(sl.as_bytes())?; + writer.write_all(sl.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; for r in self.ranges.keys() { - writer.write(r.as_bytes())?; + writer.write_all(r.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; - writer.write(&self.string_bytes)?; + writer.write_all(&string_bytes)?; Ok(()) } - fn set_pdb_id(&mut self, id: [u8; 20]) { + fn set_pdb_id(&mut self, id: DebugId) { self.pdb_id = id; } fn insert_file(&mut self, name: &str, lang: Language) -> u32 { - let name_offset = self.insert_string(name); + let name_offset = self.string_table.insert(name) as u32; let file = raw::File { name_offset, lang: lang as u32, @@ -123,65 +120,4 @@ impl PortablePdbCacheConverter { self.files.insert_full(file).0 as u32 } - - /// Insert a string into this converter. - /// - /// If the string was already present, it is not added again. A newly added string - /// is prefixed by its length in LEB128 encoding. The returned `u32` - /// is the offset into the `string_bytes` field where the string is saved. - fn insert_string(&mut self, s: &str) -> u32 { - let Self { - ref mut strings, - ref mut string_bytes, - .. - } = self; - if s.is_empty() { - return u32::MAX; - } - if let Some(&offset) = strings.get(s) { - return offset; - } - let string_offset = string_bytes.len() as u32; - let string_len = s.len() as u64; - leb128::write::unsigned(string_bytes, string_len).unwrap(); - string_bytes.extend(s.bytes()); - - strings.insert(s.to_owned(), string_offset); - string_offset - } -} - -struct WriteWrapper { - writer: W, - position: usize, -} - -impl WriteWrapper { - fn new(writer: W) -> Self { - Self { - writer, - position: 0, - } - } - - fn write(&mut self, data: &[u8]) -> std::io::Result { - let len = data.len(); - self.writer.write_all(data)?; - self.position += len; - Ok(len) - } - - fn align(&mut self) -> std::io::Result { - let buf = &[0u8; 7]; - let len = { - let to_align = self.position; - let remainder = to_align % 8; - if remainder == 0 { - remainder - } else { - 8 - remainder - } - }; - self.write(&buf[0..len]) - } } diff --git a/symbolic-ppdb/src/format/metadata.rs b/symbolic-ppdb/src/format/metadata.rs index 7ab3b9393..f81d550fb 100644 --- a/symbolic-ppdb/src/format/metadata.rs +++ b/symbolic-ppdb/src/format/metadata.rs @@ -2,7 +2,7 @@ use std::convert::TryInto; use std::fmt; use std::ops::{Index, IndexMut}; -use zerocopy::LayoutVerified; +use watto::Pod; use super::{FormatError, FormatErrorKind}; @@ -250,10 +250,8 @@ pub struct MetadataStream<'data> { impl<'data> MetadataStream<'data> { pub fn parse(buf: &'data [u8], referenced_table_sizes: [u32; 64]) -> Result { - let (lv, mut rest) = - LayoutVerified::<_, super::raw::MetadataStreamHeader>::new_from_prefix(buf) - .ok_or(FormatErrorKind::InvalidHeader)?; - let header = lv.into_ref(); + let (header, mut rest) = super::raw::MetadataStreamHeader::ref_from_prefix(buf) + .ok_or(FormatErrorKind::InvalidHeader)?; // TODO: verify major/minor version // TODO: verify reserved @@ -264,12 +262,10 @@ impl<'data> MetadataStream<'data> { continue; } - let (lv, rest_) = LayoutVerified::<_, u32>::new_from_prefix(rest) - .ok_or(FormatErrorKind::InvalidLength)?; - let len = lv.read(); + let (len, rest_) = u32::ref_from_prefix(rest).ok_or(FormatErrorKind::InvalidLength)?; rest = rest_; - table.rows = len as usize; + table.rows = *len as usize; } let table_contents = rest; diff --git a/symbolic-ppdb/src/format/mod.rs b/symbolic-ppdb/src/format/mod.rs index f5e885262..316212ddc 100644 --- a/symbolic-ppdb/src/format/mod.rs +++ b/symbolic-ppdb/src/format/mod.rs @@ -7,9 +7,9 @@ mod utils; use std::fmt; use thiserror::Error; -use zerocopy::LayoutVerified; +use watto::Pod; -use symbolic_common::Uuid; +use symbolic_common::{DebugId, Uuid}; use metadata::{MetadataStream, TableType}; use streams::{BlobStream, GuidStream, PdbStream, StringStream, UsStream}; @@ -172,9 +172,8 @@ impl fmt::Debug for PortablePdb<'_> { impl<'data> PortablePdb<'data> { /// Parses the provided buffer into a Portable PDB file. pub fn parse(buf: &'data [u8]) -> Result { - let (lv, rest) = LayoutVerified::<_, raw::Header>::new_from_prefix(buf) - .ok_or(FormatErrorKind::InvalidHeader)?; - let header = lv.into_ref(); + let (header, rest) = + raw::Header::ref_from_prefix(buf).ok_or(FormatErrorKind::InvalidHeader)?; if header.signature != raw::METADATA_SIGNATURE { return Err(FormatErrorKind::InvalidSignature.into()); @@ -195,10 +194,8 @@ impl<'data> PortablePdb<'data> { // We already know that buf is long enough. let streams_buf = &rest[version_length..]; - let (lv, mut streams_buf) = - LayoutVerified::<_, raw::HeaderPart2>::new_from_prefix(streams_buf) - .ok_or(FormatErrorKind::InvalidHeader)?; - let header2 = lv.into_ref(); + let (header2, mut streams_buf) = + raw::HeaderPart2::ref_from_prefix(streams_buf).ok_or(FormatErrorKind::InvalidHeader)?; // TODO: validate flags @@ -217,10 +214,8 @@ impl<'data> PortablePdb<'data> { }; for _ in 0..stream_count { - let (lv, after_header_buf) = - LayoutVerified::<_, raw::StreamHeader>::new_from_prefix(streams_buf) - .ok_or(FormatErrorKind::InvalidStreamHeader)?; - let header = lv.into_ref(); + let (header, after_header_buf) = raw::StreamHeader::ref_from_prefix(streams_buf) + .ok_or(FormatErrorKind::InvalidStreamHeader)?; let name_buf = after_header_buf.get(..32).unwrap_or(after_header_buf); let name_buf = name_buf @@ -295,7 +290,7 @@ impl<'data> PortablePdb<'data> { } /// Reads this file's PDB ID from its #PDB stream. - pub(crate) fn pdb_id(&self) -> Option<[u8; 20]> { + pub(crate) fn pdb_id(&self) -> Option { self.pdb_stream.as_ref().map(|stream| stream.id()) } diff --git a/symbolic-ppdb/src/format/raw.rs b/symbolic-ppdb/src/format/raw.rs index 532c19476..c65035516 100644 --- a/symbolic-ppdb/src/format/raw.rs +++ b/symbolic-ppdb/src/format/raw.rs @@ -1,4 +1,4 @@ -use zerocopy::FromBytes; +use watto::Pod; /// Signature for physical metadata as specified by ECMA-335. pub const METADATA_SIGNATURE: u32 = 0x424A_5342; @@ -7,7 +7,7 @@ pub const METADATA_SIGNATURE: u32 = 0x424A_5342; /// /// This includes everything before the version string. #[repr(C)] -#[derive(Debug, FromBytes)] +#[derive(Debug)] pub struct Header { /// The metadata signature. /// @@ -30,7 +30,7 @@ pub struct Header { /// /// This includes everything after the version string. #[repr(C)] -#[derive(Debug, FromBytes)] +#[derive(Debug)] pub struct HeaderPart2 { /// Reserved, always 0. pub flags: u16, @@ -42,7 +42,7 @@ pub struct HeaderPart2 { /// /// Does not contain the stream's name due to its variable length. #[repr(C)] -#[derive(Debug, FromBytes)] +#[derive(Debug)] pub struct StreamHeader { /// Memory offset to start of this stream form start of the metadata root. pub offset: u32, @@ -53,7 +53,7 @@ pub struct StreamHeader { } #[repr(C, packed(4))] -#[derive(Debug, FromBytes, Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct PdbStreamHeader { pub id: [u8; 20], pub entry_point: u32, @@ -61,7 +61,7 @@ pub struct PdbStreamHeader { } #[repr(C, packed(4))] -#[derive(Debug, FromBytes, Clone, Copy)] +#[derive(Debug, Clone, Copy)] pub struct MetadataStreamHeader { pub _reserved: u32, pub major_version: u8, @@ -71,3 +71,34 @@ pub struct MetadataStreamHeader { pub valid_tables: u64, pub sorted_tables: u64, } + +unsafe impl Pod for Header {} +unsafe impl Pod for HeaderPart2 {} +unsafe impl Pod for StreamHeader {} +unsafe impl Pod for PdbStreamHeader {} +unsafe impl Pod for MetadataStreamHeader {} + +#[cfg(test)] +mod tests { + use std::mem; + + use super::*; + + #[test] + fn test_sizeof() { + assert_eq!(mem::size_of::
(), 16); + assert_eq!(mem::align_of::
(), 4); + + assert_eq!(mem::size_of::(), 4); + assert_eq!(mem::align_of::(), 2); + + assert_eq!(mem::size_of::(), 8); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 32); + assert_eq!(mem::align_of::(), 4); + + assert_eq!(mem::size_of::(), 24); + assert_eq!(mem::align_of::(), 4); + } +} diff --git a/symbolic-ppdb/src/format/streams.rs b/symbolic-ppdb/src/format/streams.rs index 2e0482b31..5bd8c507e 100644 --- a/symbolic-ppdb/src/format/streams.rs +++ b/symbolic-ppdb/src/format/streams.rs @@ -1,5 +1,5 @@ -use symbolic_common::Uuid; -use zerocopy::LayoutVerified; +use symbolic_common::{DebugId, Uuid}; +use watto::Pod; use super::raw::PdbStreamHeader; use super::utils::decode_unsigned; @@ -43,9 +43,8 @@ pub(crate) struct PdbStream<'data> { impl<'data> PdbStream<'data> { pub(crate) fn parse(buf: &'data [u8]) -> Result { - let (lv, mut rest) = LayoutVerified::<_, PdbStreamHeader>::new_from_prefix(buf) - .ok_or(FormatErrorKind::InvalidHeader)?; - let header = lv.into_ref(); + let (header, mut rest) = + PdbStreamHeader::ref_from_prefix(buf).ok_or(FormatErrorKind::InvalidHeader)?; let mut referenced_table_sizes = [0; 64]; for (i, table) in referenced_table_sizes.iter_mut().enumerate() { @@ -53,12 +52,10 @@ impl<'data> PdbStream<'data> { continue; } - let (lv, rest_) = LayoutVerified::<_, u32>::new_from_prefix(rest) - .ok_or(FormatErrorKind::InvalidLength)?; - let len = lv.read(); + let (len, rest_) = u32::ref_from_prefix(rest).ok_or(FormatErrorKind::InvalidLength)?; rest = rest_; - *table = len as u32; + *table = *len as u32; } Ok(Self { header, @@ -66,8 +63,11 @@ impl<'data> PdbStream<'data> { }) } - pub(crate) fn id(&self) -> [u8; 20] { - self.header.id + pub(crate) fn id(&self) -> DebugId { + let raw_id = self.header.id; + let (guid, age) = raw_id.split_at(16); + let age = u32::from_ne_bytes(age.try_into().unwrap()); + DebugId::from_guid_age(guid, age).unwrap() } } @@ -117,12 +117,9 @@ pub(crate) struct GuidStream<'data> { impl<'data> GuidStream<'data> { pub(crate) fn parse(buf: &'data [u8]) -> Result { - let bytes = LayoutVerified::<_, [uuid::Bytes]>::new_slice(buf) - .ok_or(FormatErrorKind::InvalidLength)?; + let bytes = uuid::Bytes::slice_from_bytes(buf).ok_or(FormatErrorKind::InvalidLength)?; - Ok(Self { - buf: bytes.into_slice(), - }) + Ok(Self { buf: bytes }) } pub(crate) fn get_guid(&self, idx: u32) -> Option { diff --git a/symbolic-symcache/Cargo.toml b/symbolic-symcache/Cargo.toml index ac697311c..c92d0b0fd 100644 --- a/symbolic-symcache/Cargo.toml +++ b/symbolic-symcache/Cargo.toml @@ -29,6 +29,7 @@ symbolic-il2cpp = { version = "9.1.4", path = "../symbolic-il2cpp", optional = t thiserror = "1.0.20" indexmap = "1.7.0" tracing = "0.1.35" +watto = { version = "0.1.0", features = ["writer", "strings"] } [dev-dependencies] insta = { version = "1.18.0", features = ["yaml"] } diff --git a/symbolic-symcache/src/error.rs b/symbolic-symcache/src/error.rs index a0dd81359..b671a39fa 100644 --- a/symbolic-symcache/src/error.rs +++ b/symbolic-symcache/src/error.rs @@ -9,9 +9,13 @@ use thiserror::Error; #[non_exhaustive] pub enum ErrorKind { /// The buffer is not correctly aligned. + /// + /// This variant is currently unused. #[error("source buffer is not correctly aligned")] BufferNotAligned, /// The header's size doesn't match our expected size. + /// + /// This variant is currently unused. #[error("header is too small")] HeaderTooSmall, /// The file was generated by a system with different endianness. @@ -24,11 +28,36 @@ pub enum ErrorKind { #[error("unknown SymCache version")] WrongVersion, /// The self-advertised size of the buffer is not correct. + /// + /// This variant is currently unused. #[error("incorrect buffer length")] BadFormatLength, /// The debug file could not be converted to a symcache. #[error("bad debug file")] BadDebugFile, + /// Header could not be parsed from the cache file. + #[error("could not read header")] + InvalidHeader, + /// File data could not be parsed from the cache file. + #[error("could not read files")] + InvalidFiles, + /// Function data could not be parsed from the cache file. + #[error("could not read functions")] + InvalidFunctions, + /// Source location data could not be parsed from the cache file. + #[error("could not read source locations")] + InvalidSourceLocations, + /// Range data could not be parsed from the cache file. + #[error("could not read ranges")] + InvalidRanges, + /// The header claimed an incorrect number of string bytes. + #[error("expected {expected} string bytes, found {found}")] + UnexpectedStringBytes { + /// Expected number of string bytes. + expected: usize, + /// Number of string bytes actually found in the cache file. + found: usize, + }, } /// An error returned when handling a [`SymCache`](crate::SymCache). diff --git a/symbolic-symcache/src/lib.rs b/symbolic-symcache/src/lib.rs index cc491dc4a..1155ca6cb 100644 --- a/symbolic-symcache/src/lib.rs +++ b/symbolic-symcache/src/lib.rs @@ -108,16 +108,15 @@ pub mod transform; mod writer; use std::convert::TryInto; -use std::mem; -use std::ptr; use symbolic_common::Arch; use symbolic_common::AsSelf; use symbolic_common::DebugId; +use watto::StringTable; +use watto::{align_to, Pod}; pub use error::{Error, ErrorKind}; pub use lookup::*; -use raw::align_to_eight; pub use writer::SymCacheConverter; type Result = std::result::Result; @@ -133,7 +132,8 @@ type Result = std::result::Result; /// 5: PR #221: Invalid inlinee nesting leading to wrong stack traces /// 6: PR #319: Correct line offsets and spacer line records /// 7: PR #459: A new binary format fundamentally based on addr ranges -pub const SYMCACHE_VERSION: u32 = 7; +/// 8: PR #670: Use LEB128-prefixed string table +pub const SYMCACHE_VERSION: u32 = 8; /// The serialized SymCache binary format. /// @@ -168,116 +168,82 @@ impl<'data> SymCache<'data> { /// Parse the SymCache binary format into a convenient type that allows safe access and /// fast lookups. pub fn parse(buf: &'data [u8]) -> Result { - if align_to_eight(buf.as_ptr() as usize) != 0 { - return Err(ErrorKind::BufferNotAligned.into()); - } - - let mut header_size = mem::size_of::(); - header_size += align_to_eight(header_size); - - if buf.len() < header_size { - return Err(ErrorKind::HeaderTooSmall.into()); - } - // SAFETY: we checked that the buffer is well aligned and large enough to fit a `raw::Header`. - let header = unsafe { &*(buf.as_ptr() as *const raw::Header) }; + let (header, rest) = raw::Header::ref_from_prefix(buf).ok_or(ErrorKind::InvalidHeader)?; if header.magic == raw::SYMCACHE_MAGIC_FLIPPED { return Err(ErrorKind::WrongEndianness.into()); } if header.magic != raw::SYMCACHE_MAGIC { return Err(ErrorKind::WrongFormat.into()); } - if header.version != SYMCACHE_VERSION { + if header.version != SYMCACHE_VERSION && header.version != 7 { return Err(ErrorKind::WrongVersion.into()); } - let mut files_size = mem::size_of::() * header.num_files as usize; - files_size += align_to_eight(files_size); - - let mut functions_size = mem::size_of::() * header.num_functions as usize; - functions_size += align_to_eight(functions_size); + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFiles)?; + let (files, rest) = raw::File::slice_from_prefix(rest, header.num_files as usize) + .ok_or(ErrorKind::InvalidFiles)?; - let mut source_locations_size = - mem::size_of::() * header.num_source_locations as usize; - source_locations_size += align_to_eight(source_locations_size); + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidFunctions)?; + let (functions, rest) = + raw::Function::slice_from_prefix(rest, header.num_functions as usize) + .ok_or(ErrorKind::InvalidFunctions)?; - let mut ranges_size = mem::size_of::() * header.num_ranges as usize; - ranges_size += align_to_eight(ranges_size); + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidSourceLocations)?; + let (source_locations, rest) = + raw::SourceLocation::slice_from_prefix(rest, header.num_source_locations as usize) + .ok_or(ErrorKind::InvalidSourceLocations)?; - let expected_buf_size = header_size - + files_size - + functions_size - + source_locations_size - + ranges_size - + header.string_bytes as usize; + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::InvalidRanges)?; + let (ranges, rest) = raw::Range::slice_from_prefix(rest, header.num_ranges as usize) + .ok_or(ErrorKind::InvalidRanges)?; - if buf.len() < expected_buf_size || source_locations_size < ranges_size { - return Err(ErrorKind::BadFormatLength.into()); + let (_, rest) = align_to(rest, 8).ok_or(ErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: 0, + })?; + if rest.len() < header.string_bytes as usize { + return Err(ErrorKind::UnexpectedStringBytes { + expected: header.string_bytes as usize, + found: rest.len(), + } + .into()); } - // SAFETY: we just made sure that all the pointers we are constructing via pointer - // arithmetic are within `buf` - let files_start = unsafe { buf.as_ptr().add(header_size) }; - let functions_start = unsafe { files_start.add(files_size) }; - let source_locations_start = unsafe { functions_start.add(functions_size) }; - let ranges_start = unsafe { source_locations_start.add(source_locations_size) }; - let string_bytes_start = unsafe { ranges_start.add(ranges_size) }; - - // SAFETY: the above buffer size check also made sure we are not going out of bounds - // here - let files = unsafe { - &*ptr::slice_from_raw_parts(files_start as *const raw::File, header.num_files as usize) - }; - let functions = unsafe { - &*ptr::slice_from_raw_parts( - functions_start as *const raw::Function, - header.num_functions as usize, - ) - }; - let source_locations = unsafe { - &*ptr::slice_from_raw_parts( - source_locations_start as *const raw::SourceLocation, - header.num_source_locations as usize, - ) - }; - let ranges = unsafe { - &*ptr::slice_from_raw_parts( - ranges_start as *const raw::Range, - header.num_ranges as usize, - ) - }; - let string_bytes = unsafe { - &*ptr::slice_from_raw_parts(string_bytes_start, header.string_bytes as usize) - }; - Ok(SymCache { header, files, functions, source_locations, ranges, - string_bytes, + string_bytes: rest, }) } /// Resolves a string reference to the pointed-to `&str` data. fn get_string(&self, offset: u32) -> Option<&'data str> { - if offset == u32::MAX { - return None; - } - let len_offset = offset as usize; - let len_size = std::mem::size_of::(); - let len = u32::from_ne_bytes( - self.string_bytes - .get(len_offset..len_offset + len_size)? - .try_into() - .unwrap(), - ) as usize; + if self.header.version >= 8 { + // version >= 8: string length prefixes are LEB128 + StringTable::read(self.string_bytes, offset as usize).ok() + } else { + // version < 8: string length prefixes are u32 + if offset == u32::MAX { + return None; + } + let len_offset = offset as usize; + let len_size = std::mem::size_of::(); + let len = u32::from_ne_bytes( + self.string_bytes + .get(len_offset..len_offset + len_size)? + .try_into() + .unwrap(), + ) as usize; - let start_offset = len_offset + len_size; - let end_offset = start_offset + len; - let bytes = self.string_bytes.get(start_offset..end_offset)?; + let start_offset = len_offset + len_size; + let end_offset = start_offset + len; + let bytes = self.string_bytes.get(start_offset..end_offset)?; - std::str::from_utf8(bytes).ok() + std::str::from_utf8(bytes).ok() + } } /// The version of the SymCache file format. diff --git a/symbolic-symcache/src/raw.rs b/symbolic-symcache/src/raw.rs index a8657a9e7..1d29cd87c 100644 --- a/symbolic-symcache/src/raw.rs +++ b/symbolic-symcache/src/raw.rs @@ -1,5 +1,8 @@ //! The raw SymCache binary file format internals. //! + +use watto::Pod; + use symbolic_common::{Arch, DebugId}; /// The magic file preamble as individual bytes. @@ -110,16 +113,11 @@ pub(crate) struct SourceLocation { #[repr(C)] pub(crate) struct Range(pub(crate) u32); -/// Returns the amount left to add to the remainder to get 8 if -/// `to_align` isn't a multiple of 8. -pub(crate) fn align_to_eight(to_align: usize) -> usize { - let remainder = to_align % 8; - if remainder == 0 { - remainder - } else { - 8 - remainder - } -} +unsafe impl Pod for Header {} +unsafe impl Pod for Function {} +unsafe impl Pod for File {} +unsafe impl Pod for SourceLocation {} +unsafe impl Pod for Range {} #[cfg(test)] mod tests { diff --git a/symbolic-symcache/src/writer.rs b/symbolic-symcache/src/writer.rs index 1fd311c96..e8cf08bba 100644 --- a/symbolic-symcache/src/writer.rs +++ b/symbolic-symcache/src/writer.rs @@ -1,12 +1,13 @@ //! Defines the [SymCache Converter](`SymCacheConverter`). use std::collections::btree_map; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::io::Write; use indexmap::IndexSet; use symbolic_common::{Arch, DebugId}; use symbolic_debuginfo::{DebugSession, FileFormat, Function, ObjectLike, Symbol}; +use watto::{Pod, StringTable, Writer}; use super::{raw, transform}; use crate::{Error, ErrorKind}; @@ -29,10 +30,7 @@ pub struct SymCacheConverter<'a> { /// A list of transformers that are used to transform each function / source location. transformers: transform::Transformers<'a>, - /// The concatenation of all strings that have been added to this `Converter`. - string_bytes: Vec, - /// A map from [`String`]s that have been added to this `Converter` to their offsets in the `string_bytes` field. - strings: HashMap, + string_table: StringTable, /// The set of all [`raw::File`]s that have been added to this `Converter`. files: IndexSet, /// The set of all [`raw::Function`]s that have been added to this `Converter`. @@ -80,35 +78,6 @@ impl<'a> SymCacheConverter<'a> { self.debug_id = debug_id; } - /// Insert a string into this converter. - /// - /// If the string was already present, it is not added again. A newly added string - /// is prefixed by its length as a `u32`. The returned `u32` - /// is the offset into the `string_bytes` field where the string is saved. - fn insert_string( - string_bytes: &mut Vec, - strings: &mut HashMap, - s: &str, - ) -> u32 { - if s.is_empty() { - return u32::MAX; - } - if let Some(&offset) = strings.get(s) { - return offset; - } - let string_offset = string_bytes.len() as u32; - let string_len = s.len() as u32; - string_bytes.extend(string_len.to_ne_bytes()); - string_bytes.extend(s.bytes()); - // we should have written exactly `string_len + 4` bytes - debug_assert_eq!( - string_bytes.len(), - string_offset as usize + string_len as usize + std::mem::size_of::(), - ); - strings.insert(s.to_owned(), string_offset); - string_offset - } - // Methods processing symbolic-debuginfo [`ObjectLike`] below: // Feel free to move these to a separate file. @@ -157,6 +126,7 @@ impl<'a> SymCacheConverter<'a> { function: &Function<'_>, call_locations: &[(u32, u32)], ) { + let string_table = &mut self.string_table; // skip over empty functions or functions whose address is too large to fit in a u32 if function.size == 0 || function.address > u32::MAX as u64 { return; @@ -186,9 +156,7 @@ impl<'a> SymCacheConverter<'a> { &function.name }; - let string_bytes = &mut self.string_bytes; - let strings = &mut self.strings; - let name_offset = Self::insert_string(string_bytes, strings, function_name); + let name_offset = string_table.insert(function_name) as u32; let lang = language as u32; let (fun_idx, _) = self.functions.insert_full(raw::Function { @@ -268,16 +236,15 @@ impl<'a> SymCacheConverter<'a> { location = transformer.transform_source_location(location); } - let string_bytes = &mut self.string_bytes; - let strings = &mut self.strings; - let name_offset = Self::insert_string(string_bytes, strings, &location.file.name); + let name_offset = string_table.insert(&location.file.name) as u32; let directory_offset = location .file .directory - .map_or(u32::MAX, |d| Self::insert_string(string_bytes, strings, &d)); - let comp_dir_offset = location.file.comp_dir.map_or(u32::MAX, |cd| { - Self::insert_string(string_bytes, strings, &cd) - }); + .map_or(u32::MAX, |d| string_table.insert(&d) as u32); + let comp_dir_offset = location + .file + .comp_dir + .map_or(u32::MAX, |cd| string_table.insert(&cd) as u32); let (file_idx, _) = self.files.insert_full(raw::File { name_offset, @@ -427,7 +394,7 @@ impl<'a> SymCacheConverter<'a> { &function.name }; - Self::insert_string(&mut self.string_bytes, &mut self.strings, function_name) + self.string_table.insert(function_name) as u32 }; match self.ranges.entry(symbol.address as u32) { @@ -470,7 +437,7 @@ impl<'a> SymCacheConverter<'a> { /// /// This writes the SymCache binary format into the given [`Write`]. pub fn serialize(mut self, writer: &mut W) -> std::io::Result<()> { - let mut writer = WriteWrapper::new(writer); + let mut writer = Writer::new(writer); // Insert a trailing sentinel source location in case we have a definite end addr if let Some(last_addr) = self.last_addr { @@ -491,7 +458,7 @@ impl<'a> SymCacheConverter<'a> { let num_functions = self.functions.len() as u32; let num_source_locations = (self.call_locations.len() + self.ranges.len()) as u32; let num_ranges = self.ranges.len() as u32; - let string_bytes = self.string_bytes.len() as u32; + let string_bytes = self.string_table.into_bytes(); let header = raw::Header { magic: raw::SYMCACHE_MAGIC, @@ -504,37 +471,37 @@ impl<'a> SymCacheConverter<'a> { num_functions, num_source_locations, num_ranges, - string_bytes, + string_bytes: string_bytes.len() as u32, _reserved: [0; 16], }; - writer.write(&[header])?; - writer.align()?; + writer.write_all(header.as_bytes())?; + writer.align_to(8)?; for f in self.files { - writer.write(&[f])?; + writer.write_all(f.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; for f in self.functions { - writer.write(&[f])?; + writer.write_all(f.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; for s in self.call_locations { - writer.write(&[s])?; + writer.write_all(s.as_bytes())?; } for s in self.ranges.values() { - writer.write(std::slice::from_ref(s))?; + writer.write_all(s.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; for r in self.ranges.keys() { - writer.write(&[raw::Range(*r)])?; + writer.write_all(r.as_bytes())?; } - writer.align()?; + writer.align_to(8)?; - writer.write(&self.string_bytes)?; + writer.write_all(&string_bytes)?; Ok(()) } @@ -583,33 +550,3 @@ fn undecorate_win_symbol(name: &str) -> &str { name } - -struct WriteWrapper { - writer: W, - position: usize, -} - -impl WriteWrapper { - fn new(writer: W) -> Self { - Self { - writer, - position: 0, - } - } - - fn write(&mut self, data: &[T]) -> std::io::Result { - let pointer = data.as_ptr() as *const u8; - let len = std::mem::size_of_val(data); - // SAFETY: both pointer and len are derived directly from data/T and are valid. - let buf = unsafe { std::slice::from_raw_parts(pointer, len) }; - self.writer.write_all(buf)?; - self.position += len; - Ok(len) - } - - fn align(&mut self) -> std::io::Result { - let buf = &[0u8; 7]; - let len = raw::align_to_eight(self.position); - self.write(&buf[0..len]) - } -} diff --git a/symbolic-symcache/tests/test_writer.rs b/symbolic-symcache/tests/test_writer.rs index 8d9e3a7d1..56bf64dd2 100644 --- a/symbolic-symcache/tests/test_writer.rs +++ b/symbolic-symcache/tests/test_writer.rs @@ -25,7 +25,7 @@ fn test_write_header_linux() -> Result<(), Error> { let symcache = SymCache::parse(&buffer)?; insta::assert_debug_snapshot!(symcache, @r###" SymCache { - version: 7, + version: 8, debug_id: DebugId { uuid: "c0bcc3f1-9827-fe65-3058-404b2831d9e6", appendix: 0, @@ -35,7 +35,7 @@ fn test_write_header_linux() -> Result<(), Error> { functions: 697, source_locations: 8305, ranges: 6843, - string_bytes: 52180, + string_bytes: 49877, } "###); @@ -69,7 +69,7 @@ fn test_write_header_macos() -> Result<(), Error> { let symcache = SymCache::parse(&buffer)?; insta::assert_debug_snapshot!(symcache, @r###" SymCache { - version: 7, + version: 8, debug_id: DebugId { uuid: "67e9247c-814e-392b-a027-dbde6748fcbf", appendix: 0, @@ -79,7 +79,7 @@ fn test_write_header_macos() -> Result<(), Error> { functions: 639, source_locations: 7204, ranges: 5759, - string_bytes: 42829, + string_bytes: 40958, } "###);