Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6245922
chore: Upgrade nightly to 2025-12-15
LukeMathWalker Dec 17, 2025
bc2b9aa
chore: Don't reset the filesystem ahead of UI test execution
LukeMathWalker Dec 18, 2025
a147a55
chore: Don't change the mtime of the diagnostic file if there were no…
LukeMathWalker Dec 18, 2025
d8c9032
chore: Create a chokepoint to apply cargo flags in the test runner
LukeMathWalker Dec 20, 2025
e9abd86
chore: Fix lints
LukeMathWalker Dec 20, 2025
971f2e3
chore: Upgrade dependencies
LukeMathWalker Dec 21, 2025
4852e3f
chore: Don't ignore generated app code in UI tests
LukeMathWalker Dec 21, 2025
f6ba604
chore: Ignore broken doc links we can't fix
LukeMathWalker Dec 21, 2025
86797ac
chore: Extract metadata computation in an auxiliary function
LukeMathWalker Dec 21, 2025
3c1cc63
perf: Index crate items in parallel
LukeMathWalker Dec 21, 2025
40ac905
perf: Don't recompute the feature graph
LukeMathWalker Dec 26, 2025
7e4e89c
perf: Convert JSON docs to the expected caching format in parallel
LukeMathWalker Dec 26, 2025
253d080
perf: Use bincode to encode items rather than JSON
LukeMathWalker Dec 26, 2025
9923e81
perf: Avoid unnecessary clones when retrieving crate docs from the cache
LukeMathWalker Dec 26, 2025
61d9bb1
perf: Compute cache key on the thread pool
LukeMathWalker Dec 26, 2025
4522eaf
chore: Ignore samply-generated profile.json files
LukeMathWalker Dec 26, 2025
f084c93
perf: Cache crate_id lookups
LukeMathWalker Dec 26, 2025
b2c73a2
perf: Avoid unnecessary allocations when decoding items
LukeMathWalker Dec 26, 2025
7056251
perf: Avoid long-tail execution for complex crates on a single thread
LukeMathWalker Dec 26, 2025
bb66737
perf: Tune SQLite and use native bincode encoding/decoding speed
LukeMathWalker Dec 28, 2025
829ed25
perf: Don't copy the cached items to an intermediate buffer
LukeMathWalker Dec 28, 2025
ecf94ce
chore: Support rkyv in our rustdoc_types fork
LukeMathWalker Dec 30, 2025
611c092
chore: Formatting
LukeMathWalker Dec 30, 2025
ed558ae
perf: Use rkyv to deserialize crate items from the cache
LukeMathWalker Dec 30, 2025
6b0f8e5
perf: Don't block the main thread waiting for cargo's fingerprint
LukeMathWalker Dec 30, 2025
871dd74
perf: Don't deserialize the entire import path <> id map.
LukeMathWalker Dec 30, 2025
d067e7e
perf: Use rkyv to deserialize paths, on a need-to-use basis
LukeMathWalker Jan 17, 2026
7983f33
chore: Fix CI checks
LukeMathWalker Jan 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
perf: Use rkyv to deserialize paths, on a need-to-use basis
  • Loading branch information
LukeMathWalker committed Jan 17, 2026
commit d067e7e0cfa38da5b0a3c8713b1c53beb0b29a5d
2 changes: 2 additions & 0 deletions compiler/pavex_rustdoc_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,8 @@ pub struct Id(pub u32);
rkyv::Deserialize,
)]
#[rkyv(derive(Debug))]
#[rkyv(compare(PartialEq))]
#[repr(u8)]
#[serde(rename_all = "snake_case")]
pub enum ItemKind {
/// A module declaration, e.g. `mod foo;` or `mod foo {}`
Expand Down
31 changes: 18 additions & 13 deletions compiler/pavexc/src/rustdoc/compute/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ use crate::{
rustdoc::{
annotations::AnnotatedItems,
queries::{
CrateData, CrateItemIndex, ImportPath2Id, LazyCrateItemIndex, LazyImportPath2Id,
CrateData, CrateItemIndex, CrateItemPaths, ImportPath2Id, LazyCrateItemIndex,
LazyCrateItemPaths, LazyImportPath2Id,
},
},
};
Expand Down Expand Up @@ -380,7 +381,7 @@ impl ToolchainCache {
let krate = CacheEntry {
root_item_id,
external_crates: Cow::Borrowed(external_crates),
paths: Cow::Borrowed(paths),
paths: RkyvCowBytes::Borrowed(paths),
format_version,
items: RkyvCowBytes::Borrowed(items),
secondary_indexes: Some(SecondaryIndexes {
Expand Down Expand Up @@ -601,7 +602,7 @@ impl ThirdPartyCrateCache {
let krate = CacheEntry {
root_item_id,
external_crates: Cow::Borrowed(external_crates),
paths: Cow::Borrowed(paths),
paths: RkyvCowBytes::Borrowed(paths),
format_version,
items: RkyvCowBytes::Borrowed(items),
secondary_indexes,
Expand Down Expand Up @@ -786,7 +787,7 @@ impl ThirdPartyCrateCache {
pub(in crate::rustdoc) struct CacheEntry<'a> {
root_item_id: u32,
external_crates: Cow<'a, [u8]>,
paths: Cow<'a, [u8]>,
paths: RkyvCowBytes<'a>,
format_version: i64,
items: RkyvCowBytes<'a>,
secondary_indexes: Option<SecondaryIndexes<'a>>,
Expand Down Expand Up @@ -879,19 +880,26 @@ impl<'a> CacheEntry<'a> {
the same crate twice? This is a bug."
);
};
let CrateItemPaths::Eager(paths) = &crate_data.paths else {
anyhow::bail!(
"The crate item paths is not deserialized. Are we trying to cache \
the same crate twice? This is a bug."
);
};

// Serialize the items HashMap using rkyv for zero-copy deserialization later.
let items = rkyv::to_bytes::<rkyv::rancor::Error>(&index.index)
.map_err(|e| anyhow::anyhow!(e).context("Failed to serialize crate items with rkyv"))?;

let external_crates =
bincode::serde::encode_to_vec(&crate_data.external_crates, BINCODE_CONFIG)?;
let paths = bincode::serde::encode_to_vec(&crate_data.paths, BINCODE_CONFIG)?;
let paths = rkyv::to_bytes::<rkyv::rancor::Error>(&paths.paths).map_err(|e| {
anyhow::anyhow!(e).context("Failed to serialize item summaries with rkyv")
})?;

Ok(CacheEntry {
root_item_id: crate_data.root_item_id.0,
external_crates: Cow::Owned(external_crates),
paths: Cow::Owned(paths),
paths: RkyvCowBytes::Owned(paths),
format_version: crate_data.format_version as i64,
items: RkyvCowBytes::Owned(items),
secondary_indexes: None,
Expand All @@ -904,17 +912,14 @@ impl<'a> CacheEntry<'a> {
/// since it can be quite large and deserialization can be slow for large crates.
/// The item index is stored as rkyv-serialized bytes for zero-copy access.
pub(super) fn hydrate(self, package_id: PackageId) -> Result<RustdocCacheEntry, anyhow::Error> {
let paths = tracing::trace_span!("Deserialize paths")
.in_scope(|| bincode::decode_from_slice(&self.paths, BINCODE_CONFIG))
.context("Failed to deserialize paths")?
.0;

let crate_data = CrateData {
root_item_id: rustdoc_types::Id(self.root_item_id.to_owned()),
external_crates: bincode::decode_from_slice(&self.external_crates, BINCODE_CONFIG)
.context("Failed to deserialize external_crates")?
.0,
paths,
paths: CrateItemPaths::Lazy(LazyCrateItemPaths {
bytes: self.paths.into_owned(),
}),
format_version: self.format_version.try_into()?,
index: CrateItemIndex::Lazy(LazyCrateItemIndex {
bytes: self.items.into_owned(),
Expand Down
136 changes: 128 additions & 8 deletions compiler/pavexc/src/rustdoc/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ use guppy::{PackageId, Version};
use indexmap::IndexSet;
use rayon::iter::IntoParallelRefIterator;
use rkyv::collections::swiss_table::ArchivedHashMap;
use rkyv::hash::FxHasher64;
use rkyv::rancor::Panic;
use rkyv::string::ArchivedString;
use rkyv::util::AlignedVec;
use rkyv::vec::ArchivedVec;
use rustc_hash::FxHashMap;
use rustdoc_types::{
ArchivedId, ArchivedItem, ExternalCrate, Item, ItemEnum, ItemKind, ItemSummary, Visibility,
ArchivedId, ArchivedItem, ArchivedItemSummary, ExternalCrate, Item, ItemEnum, ItemKind,
ItemSummary, Visibility,
};
use tracing::Span;
use tracing_log_error::log_error;
Expand Down Expand Up @@ -989,13 +991,131 @@ pub(crate) struct CrateData {
pub external_crates: FxHashMap<u32, ExternalCrate>,
/// A mapping from the id of a type to its fully qualified path.
/// Primarily useful for foreign items that are being re-exported by this crate.
#[allow(clippy::disallowed_types)]
pub paths: FxHashMap<rustdoc_types::Id, ItemSummary>,
pub paths: CrateItemPaths,
/// The version of the JSON format used by rustdoc.
pub format_version: u32,
/// The index of all the items in the crate.
pub index: CrateItemIndex,
}

#[derive(Debug, Clone)]
/// A mapping from the id of a type to its fully qualified path.
///
/// Primarily useful for foreign items that are being re-exported by this crate.
pub(crate) enum CrateItemPaths {
Eager(EagerCrateItemPaths),
Lazy(LazyCrateItemPaths),
}

impl CrateItemPaths {
/// Retrieve an item summary from the index given its id.
pub fn get(&self, id: &rustdoc_types::Id) -> Option<Cow<'_, ItemSummary>> {
match self {
Self::Eager(m) => m.paths.get(id).map(Cow::Borrowed),
Self::Lazy(m) => {
let item = m.get_deserialized(id)?;
Some(Cow::Owned(item))
}
}
}

pub fn iter(&self) -> impl Iterator<Item = (rustdoc_types::Id, ItemSummaryRef<'_>)> {
match self {
CrateItemPaths::Eager(paths) => CrateItemPathsIter::Eager(paths.paths.iter()),
CrateItemPaths::Lazy(paths) => CrateItemPathsIter::Lazy(paths.archived().iter()),
}
}
}

pub enum CrateItemPathsIter<'a> {
Eager(std::collections::hash_map::Iter<'a, rustdoc_types::Id, ItemSummary>),
Lazy(
rkyv::collections::swiss_table::map::Iter<'a, ArchivedId, ArchivedItemSummary, FxHasher64>,
),
}

pub enum ItemSummaryRef<'a> {
Eager(&'a ItemSummary),
Lazy(&'a ArchivedItemSummary),
}

impl<'a> ItemSummaryRef<'a> {
pub fn crate_id(&self) -> u32 {
match self {
ItemSummaryRef::Eager(s) => s.crate_id,
ItemSummaryRef::Lazy(s) => s.crate_id.to_native(),
}
}

pub fn kind(&self) -> ItemKind {
match self {
ItemSummaryRef::Eager(s) => s.kind,
ItemSummaryRef::Lazy(s) => {
// Safe to do since the enum is repr(u8)
rkyv::deserialize::<_, rkyv::rancor::Infallible>(&s.kind).unwrap()
}
}
}

pub fn path(&self) -> Cow<'_, [String]> {
match self {
ItemSummaryRef::Eager(s) => Cow::Borrowed(&s.path),
ItemSummaryRef::Lazy(s) => {
Cow::Owned(s.path.iter().map(|s| s.as_str().to_owned()).collect())
}
}
}
}

impl<'a> Iterator for CrateItemPathsIter<'a> {
type Item = (rustdoc_types::Id, ItemSummaryRef<'a>);

fn next(&mut self) -> Option<Self::Item> {
match self {
Self::Eager(iter) => iter.next().map(|(k, v)| (*k, ItemSummaryRef::Eager(v))),
Self::Lazy(iter) => iter
.next()
.map(|(k, v)| (rustdoc_types::Id(k.0.to_native()), ItemSummaryRef::Lazy(v))),
}
}
}

#[derive(Debug, Clone)]
/// See [`CrateItemPaths`] for more information.
pub(crate) struct EagerCrateItemPaths {
#[allow(clippy::disallowed_types)]
pub paths: FxHashMap<rustdoc_types::Id, ItemSummary>,
}

/// See [`CrateItemPaths`] for more information.
#[derive(Debug, Clone)]
pub(crate) struct LazyCrateItemPaths {
pub(super) bytes: AlignedVec,
}

impl LazyCrateItemPaths {
/// Get zero-copy access to the archived HashMap.
#[inline]
fn archived(&self) -> &ArchivedHashMap<ArchivedId, ArchivedItemSummary> {
// SAFETY: The bytes were serialized by rkyv from a valid HashMap<Id, ItemSummary>.
// We trust the cache to contain valid data.
unsafe {
rkyv::access_unchecked::<ArchivedHashMap<ArchivedId, ArchivedItemSummary>>(&self.bytes)
}
}

/// Get an item by its ID, returning a reference to the archived summary.
pub fn get(&self, id: &rustdoc_types::Id) -> Option<&ArchivedItemSummary> {
self.archived().get(&ArchivedId(id.0.into()))
}

/// Deserialize a summary by its ID.
pub fn get_deserialized(&self, id: &rustdoc_types::Id) -> Option<ItemSummary> {
let archived = self.get(id)?;
Some(rkyv::deserialize::<ItemSummary, Panic>(archived).unwrap())
}
}

#[derive(Debug, Clone)]
/// The index of all the items in the crate.
///
Expand Down Expand Up @@ -1107,7 +1227,7 @@ impl Crate {
index: CrateItemIndex::Eager(EagerCrateItemIndex { index: krate.index }),
external_crates: krate.external_crates,
format_version: krate.format_version,
paths: krate.paths,
paths: CrateItemPaths::Eager(EagerCrateItemPaths { paths: krate.paths }),
};
Self::index(crate_data, package_id, diagnostics)
}
Expand All @@ -1123,18 +1243,18 @@ impl Crate {
.iter()
.filter_map(|(id, summary)| {
// We only want types, no macros
if matches!(summary.kind, ItemKind::Macro | ItemKind::ProcDerive) {
if matches!(summary.kind(), ItemKind::Macro | ItemKind::ProcDerive) {
return None;
}
// We will index local items on our own.
// We don't get them from `paths` because it may include private items
// as well, and we don't have a way to figure out if an item is private
// or not from the summary info.
if summary.crate_id == 0 {
if summary.crate_id() == 0 {
return None;
}

Some((summary.path.clone(), id.to_owned()))
Some((summary.path().into_owned(), id.to_owned()))
})
.collect();

Expand Down Expand Up @@ -1312,7 +1432,7 @@ impl Crate {
fn get_summary_by_local_type_id(
&self,
id: &rustdoc_types::Id,
) -> Result<&rustdoc_types::ItemSummary, anyhow::Error> {
) -> Result<Cow<'_, rustdoc_types::ItemSummary>, anyhow::Error> {
self.core.krate.paths.get(id).ok_or_else(|| {
anyhow!(
"Failed to look up the type id `{}` in the rustdoc's path index for `{}`. \
Expand Down