From 89eb406e4751b80dada86c9c149a507ddd09e6ba Mon Sep 17 00:00:00 2001 From: Ed Page Date: Thu, 11 Jul 2024 13:18:25 -0500 Subject: [PATCH] Add more profiling traces Someone suggested I look at [zed](https://github.com/zed-industries/zed). It has some interesting performance characteristics compared to projects I looked at before: - A lot of workspace members - git dependencies - patches This adds traces that help provide more context for those scenarios when looking at them. Some of these areas might be of interest for optimizing: - On every invocation, we do a git2 `copy_to` call for each git checkout - On every invocation, we do a `du` on each git checkout - It'd be great to avoid parsing every manifest in a git checkout but we need to do a full parse to make sure we find all packages (see https://rust-lang.zulipchat.com/#narrow/stream/246057-t-cargo/topic/Redundant.20code.20in.20.60GitSouce.60.3F) - Its suspicious how much time we spend in the "poison" step of resolving when its a no-op --- src/cargo/core/global_cache_tracker.rs | 1 + src/cargo/core/registry.rs | 2 + src/cargo/core/workspace.rs | 4 + src/cargo/ops/cargo_read_manifest.rs | 1 + src/cargo/ops/resolve.rs | 176 +++++++++++++------------ src/cargo/sources/git/utils.rs | 1 + 6 files changed, 102 insertions(+), 83 deletions(-) diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs index 9add10f26a1..89ee32c8466 100644 --- a/src/cargo/core/global_cache_tracker.rs +++ b/src/cargo/core/global_cache_tracker.rs @@ -1799,6 +1799,7 @@ pub fn is_silent_error(e: &anyhow::Error) -> bool { } /// Returns the disk usage for a git checkout directory. +#[tracing::instrument] pub fn du_git_checkout(path: &Path) -> CargoResult { // !.git is used because clones typically use hardlinks for the git // contents. TODO: Verify behavior on Windows. diff --git a/src/cargo/core/registry.rs b/src/cargo/core/registry.rs index a61826f6537..780e0c4fe71 100644 --- a/src/cargo/core/registry.rs +++ b/src/cargo/core/registry.rs @@ -336,6 +336,7 @@ impl<'gctx> PackageRegistry<'gctx> { /// The return value is a `Vec` of patches that should *not* be locked. /// This happens when the patch is locked, but the patch has been updated /// so the locked value is no longer correct. + #[tracing::instrument(skip(self, patch_deps))] pub fn patch( &mut self, url: &Url, @@ -784,6 +785,7 @@ impl<'gctx> Registry for PackageRegistry<'gctx> { } } + #[tracing::instrument(skip_all)] fn block_until_ready(&mut self) -> CargoResult<()> { if cfg!(debug_assertions) { // Force borrow to catch invalid borrows, regardless of which source is used and how it diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index d1999aafc2f..4ac8777bd62 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -723,6 +723,7 @@ impl<'gctx> Workspace<'gctx> { /// verifies that those are all valid packages to point to. Otherwise, this /// will transitively follow all `path` dependencies looking for members of /// the workspace. + #[tracing::instrument(skip_all)] fn find_members(&mut self) -> CargoResult<()> { let Some(workspace_config) = self.load_workspace_config()? else { debug!("find_members - only me as a member"); @@ -886,6 +887,7 @@ impl<'gctx> Workspace<'gctx> { /// 1. A workspace only has one root. /// 2. All workspace members agree on this one root as the root. /// 3. The current crate is a member of this workspace. + #[tracing::instrument(skip_all)] fn validate(&mut self) -> CargoResult<()> { // The rest of the checks require a VirtualManifest or multiple members. if self.root_manifest.is_none() { @@ -954,6 +956,7 @@ impl<'gctx> Workspace<'gctx> { } } + #[tracing::instrument(skip_all)] fn validate_members(&mut self) -> CargoResult<()> { for member in self.members.clone() { let root = self.find_root(&member)?; @@ -1812,6 +1815,7 @@ impl WorkspaceRootConfig { self.members.is_some() } + #[tracing::instrument(skip_all)] fn members_paths(&self, globs: &[String]) -> CargoResult> { let mut expanded_list = Vec::new(); diff --git a/src/cargo/ops/cargo_read_manifest.rs b/src/cargo/ops/cargo_read_manifest.rs index e8eaf1b43f9..960974ebece 100644 --- a/src/cargo/ops/cargo_read_manifest.rs +++ b/src/cargo/ops/cargo_read_manifest.rs @@ -34,6 +34,7 @@ pub fn read_package( Ok(Package::new(manifest, path)) } +#[tracing::instrument(skip(source_id, gctx))] pub fn read_packages( path: &Path, source_id: SourceId, diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index c41f9efb447..65b3f2b6814 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -387,6 +387,7 @@ pub fn resolve_with_previous<'gctx>( register_previous_locks(ws, registry, r, &keep, dev_deps); // Prefer to use anything in the previous lock file, aka we want to have conservative updates. + let _span = tracing::span!(tracing::Level::TRACE, "prefer_package_id").entered(); for id in r.iter().filter(keep) { debug!("attempting to prefer {}", id); version_prefs.prefer_package_id(id); @@ -397,20 +398,22 @@ pub fn resolve_with_previous<'gctx>( registry.lock_patches(); } - let summaries: Vec<(Summary, ResolveOpts)> = ws - .members_with_features(specs, cli_features)? - .into_iter() - .map(|(member, features)| { - let summary = registry.lock(member.summary().clone()); - ( - summary, - ResolveOpts { - dev_deps, - features: RequestedFeatures::CliFeatures(features), - }, - ) - }) - .collect(); + let summaries: Vec<(Summary, ResolveOpts)> = { + let _span = tracing::span!(tracing::Level::TRACE, "registry.lock").entered(); + ws.members_with_features(specs, cli_features)? + .into_iter() + .map(|(member, features)| { + let summary = registry.lock(member.summary().clone()); + ( + summary, + ResolveOpts { + dev_deps, + features: RequestedFeatures::CliFeatures(features), + }, + ) + }) + .collect() + }; let replace = lock_replacements(ws, previous, &keep); @@ -498,6 +501,7 @@ pub fn get_resolved_packages<'gctx>( /// /// Note that this function, at the time of this writing, is basically the /// entire fix for issue #4127. +#[tracing::instrument(skip_all)] fn register_previous_locks( ws: &Workspace<'_>, registry: &mut PackageRegistry<'_>, @@ -578,60 +582,63 @@ fn register_previous_locks( // crates from crates.io* are not locked (aka added to `avoid_locking`). // For dependencies like `log` their previous version in the lock file will // come up first before newer version, if newer version are available. - let mut path_deps = ws.members().cloned().collect::>(); - let mut visited = HashSet::new(); - while let Some(member) = path_deps.pop() { - if !visited.insert(member.package_id()) { - continue; - } - let is_ws_member = ws.is_member(&member); - for dep in member.dependencies() { - // If this dependency didn't match anything special then we may want - // to poison the source as it may have been added. If this path - // dependencies is **not** a workspace member, however, and it's an - // optional/non-transitive dependency then it won't be necessarily - // be in our lock file. If this shows up then we avoid poisoning - // this source as otherwise we'd repeatedly update the registry. - // - // TODO: this breaks adding an optional dependency in a - // non-workspace member and then simultaneously editing the - // dependency on that crate to enable the feature. For now, - // this bug is better than the always-updating registry though. - if !is_ws_member && (dep.is_optional() || !dep.is_transitive()) { + { + let _span = tracing::span!(tracing::Level::TRACE, "poison").entered(); + let mut path_deps = ws.members().cloned().collect::>(); + let mut visited = HashSet::new(); + while let Some(member) = path_deps.pop() { + if !visited.insert(member.package_id()) { continue; } + let is_ws_member = ws.is_member(&member); + for dep in member.dependencies() { + // If this dependency didn't match anything special then we may want + // to poison the source as it may have been added. If this path + // dependencies is **not** a workspace member, however, and it's an + // optional/non-transitive dependency then it won't be necessarily + // be in our lock file. If this shows up then we avoid poisoning + // this source as otherwise we'd repeatedly update the registry. + // + // TODO: this breaks adding an optional dependency in a + // non-workspace member and then simultaneously editing the + // dependency on that crate to enable the feature. For now, + // this bug is better than the always-updating registry though. + if !is_ws_member && (dep.is_optional() || !dep.is_transitive()) { + continue; + } - // If dev-dependencies aren't being resolved, skip them. - if !dep.is_transitive() && !dev_deps { - continue; - } + // If dev-dependencies aren't being resolved, skip them. + if !dep.is_transitive() && !dev_deps { + continue; + } - // If this is a path dependency, then try to push it onto our - // worklist. - if let Some(pkg) = path_pkg(dep.source_id()) { - path_deps.push(pkg); - continue; - } + // If this is a path dependency, then try to push it onto our + // worklist. + if let Some(pkg) = path_pkg(dep.source_id()) { + path_deps.push(pkg); + continue; + } - // If we match *anything* in the dependency graph then we consider - // ourselves all ok, and assume that we'll resolve to that. - if resolve.iter().any(|id| dep.matches_ignoring_source(id)) { - continue; - } + // If we match *anything* in the dependency graph then we consider + // ourselves all ok, and assume that we'll resolve to that. + if resolve.iter().any(|id| dep.matches_ignoring_source(id)) { + continue; + } - // Ok if nothing matches, then we poison the source of these - // dependencies and the previous lock file. - debug!( - "poisoning {} because {} looks like it changed {}", - dep.source_id(), - member.package_id(), - dep.package_name() - ); - for id in resolve - .iter() - .filter(|id| id.source_id() == dep.source_id()) - { - add_deps(resolve, id, &mut avoid_locking); + // Ok if nothing matches, then we poison the source of these + // dependencies and the previous lock file. + debug!( + "poisoning {} because {} looks like it changed {}", + dep.source_id(), + member.package_id(), + dep.package_name() + ); + for id in resolve + .iter() + .filter(|id| id.source_id() == dep.source_id()) + { + add_deps(resolve, id, &mut avoid_locking); + } } } } @@ -661,28 +668,31 @@ fn register_previous_locks( let keep = |id: &PackageId| keep(id) && !avoid_locking.contains(id); registry.clear_lock(); - for node in resolve.iter().filter(keep) { - let deps = resolve - .deps_not_replaced(node) - .map(|p| p.0) - .filter(keep) - .collect::>(); - - // In the v2 lockfile format and prior the `branch=master` dependency - // directive was serialized the same way as the no-branch-listed - // directive. Nowadays in Cargo, however, these two directives are - // considered distinct and are no longer represented the same way. To - // maintain compatibility with older lock files we register locked nodes - // for *both* the master branch and the default branch. - // - // Note that this is only applicable for loading older resolves now at - // this point. All new lock files are encoded as v3-or-later, so this is - // just compat for loading an old lock file successfully. - if let Some(node) = master_branch_git_source(node, resolve) { - registry.register_lock(node, deps.clone()); - } + { + let _span = tracing::span!(tracing::Level::TRACE, "register_lock").entered(); + for node in resolve.iter().filter(keep) { + let deps = resolve + .deps_not_replaced(node) + .map(|p| p.0) + .filter(keep) + .collect::>(); + + // In the v2 lockfile format and prior the `branch=master` dependency + // directive was serialized the same way as the no-branch-listed + // directive. Nowadays in Cargo, however, these two directives are + // considered distinct and are no longer represented the same way. To + // maintain compatibility with older lock files we register locked nodes + // for *both* the master branch and the default branch. + // + // Note that this is only applicable for loading older resolves now at + // this point. All new lock files are encoded as v3-or-later, so this is + // just compat for loading an old lock file successfully. + if let Some(node) = master_branch_git_source(node, resolve) { + registry.register_lock(node, deps.clone()); + } - registry.register_lock(node, deps); + registry.register_lock(node, deps); + } } /// Recursively add `node` and all its transitive dependencies to `set`. diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 5ade00902ed..844a4dcf58b 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -164,6 +164,7 @@ impl GitRemote { impl GitDatabase { /// Checkouts to a revision at `dest`ination from this database. + #[tracing::instrument(skip(self, gctx))] pub fn copy_to( &self, rev: git2::Oid,