diff --git a/src/cargo/sources/registry/index.rs b/src/cargo/sources/registry/index.rs index bbd7e226ca7..026f20204a9 100644 --- a/src/cargo/sources/registry/index.rs +++ b/src/cargo/sources/registry/index.rs @@ -1,14 +1,15 @@ use std::collections::HashMap; -use std::io::prelude::*; -use std::fs::File; use std::path::Path; +use std::str; use serde_json; use core::dependency::{Dependency, DependencyInner, Kind}; use core::{SourceId, Summary, PackageId, Registry}; use sources::registry::{RegistryPackage, RegistryDependency, INDEX_LOCK}; +use sources::registry::RegistryData; use util::{CargoResult, ChainError, internal, Filesystem, Config}; +use util::human; pub struct RegistryIndex<'cfg> { source_id: SourceId, @@ -23,7 +24,8 @@ impl<'cfg> RegistryIndex<'cfg> { pub fn new(id: &SourceId, path: &Filesystem, config: &'cfg Config, - locked: bool) -> RegistryIndex<'cfg> { + locked: bool) + -> RegistryIndex<'cfg> { RegistryIndex { source_id: id.clone(), path: path.clone(), @@ -35,13 +37,16 @@ impl<'cfg> RegistryIndex<'cfg> { } /// Return the hash listed for a specified PackageId. - pub fn hash(&mut self, pkg: &PackageId) -> CargoResult { + pub fn hash(&mut self, + pkg: &PackageId, + load: &mut RegistryData) + -> CargoResult { let key = (pkg.name().to_string(), pkg.version().to_string()); if let Some(s) = self.hashes.get(&key) { return Ok(s.clone()) } // Ok, we're missing the key, so parse the index file to load it. - self.summaries(pkg.name())?; + self.summaries(pkg.name(), load)?; self.hashes.get(&key).chain_error(|| { internal(format!("no hash listed for {}", pkg)) }).map(|s| s.clone()) @@ -51,11 +56,14 @@ impl<'cfg> RegistryIndex<'cfg> { /// /// Returns a list of pairs of (summary, yanked) for the package name /// specified. - pub fn summaries(&mut self, name: &str) -> CargoResult<&Vec<(Summary, bool)>> { + pub fn summaries(&mut self, + name: &str, + load: &mut RegistryData) + -> CargoResult<&Vec<(Summary, bool)>> { if self.cache.contains_key(name) { return Ok(&self.cache[name]); } - let summaries = self.load_summaries(name)?; + let summaries = self.load_summaries(name, load)?; let summaries = summaries.into_iter().filter(|summary| { summary.0.package_id().name() == name }).collect(); @@ -63,8 +71,11 @@ impl<'cfg> RegistryIndex<'cfg> { Ok(&self.cache[name]) } - fn load_summaries(&mut self, name: &str) -> CargoResult> { - let (path, _lock) = if self.locked { + fn load_summaries(&mut self, + name: &str, + load: &mut RegistryData) + -> CargoResult> { + let (root, _lock) = if self.locked { let lock = self.path.open_ro(Path::new(INDEX_LOCK), self.config, "the registry index"); @@ -84,25 +95,32 @@ impl<'cfg> RegistryIndex<'cfg> { // see module comment for why this is structured the way it is let path = match fs_name.len() { - 1 => path.join("1").join(&fs_name), - 2 => path.join("2").join(&fs_name), - 3 => path.join("3").join(&fs_name[..1]).join(&fs_name), - _ => path.join(&fs_name[0..2]) - .join(&fs_name[2..4]) - .join(&fs_name), + 1 => format!("1/{}", fs_name), + 2 => format!("2/{}", fs_name), + 3 => format!("3/{}/{}", &fs_name[..1], fs_name), + _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name), + // 1 => Path::new("1").join(fs_name), + // 2 => Path::new("2").join(fs_name), + // 3 => Path::new("3").join(&fs_name[..1]).join(fs_name), + // _ => Path::new(&fs_name[0..2]).join(&fs_name[2..4]).join(fs_name), }; - match File::open(&path) { - Ok(mut f) => { - let mut contents = String::new(); - f.read_to_string(&mut contents)?; - let ret: CargoResult>; - ret = contents.lines().filter(|l| !l.trim().is_empty()) - .map(|l| self.parse_registry_package(l)) - .collect(); - ret.chain_error(|| { - internal(format!("failed to parse registry's information \ - for: {}", name)) - }) + match load.load(&root, Path::new(&path)) { + Ok(contents) => { + let contents = str::from_utf8(&contents).map_err(|_| { + human("registry index file was not valid utf-8") + })?; + let lines = contents.lines() + .map(|s| s.trim()) + .filter(|l| !l.is_empty()); + + // Attempt forwards-compatibility on the index by ignoring + // everything that we ourselves don't understand, that should + // allow future cargo implementations to break the + // interpretation of each line here and older cargo will simply + // ignore the new lines. + Ok(lines.filter_map(|line| { + self.parse_registry_package(line).ok() + }).collect()) } Err(..) => Ok(Vec::new()), } @@ -161,12 +179,13 @@ impl<'cfg> RegistryIndex<'cfg> { .set_kind(kind) .into_dependency()) } -} -impl<'cfg> Registry for RegistryIndex<'cfg> { - fn query(&mut self, dep: &Dependency) -> CargoResult> { + pub fn query(&mut self, + dep: &Dependency, + load: &mut RegistryData) + -> CargoResult> { let mut summaries = { - let summaries = self.summaries(dep.name())?; + let summaries = self.summaries(dep.name(), load)?; summaries.iter().filter(|&&(_, yanked)| { dep.source_id().precise().is_some() || !yanked }).map(|s| s.0.clone()).collect::>() @@ -188,8 +207,4 @@ impl<'cfg> Registry for RegistryIndex<'cfg> { }); summaries.query(dep) } - - fn supports_checksums(&self) -> bool { - true - } } diff --git a/src/cargo/sources/registry/local.rs b/src/cargo/sources/registry/local.rs index 67e88a9c72d..fa4cbcd82d1 100644 --- a/src/cargo/sources/registry/local.rs +++ b/src/cargo/sources/registry/local.rs @@ -6,8 +6,9 @@ use rustc_serialize::hex::ToHex; use core::PackageId; use sources::registry::{RegistryData, RegistryConfig}; -use util::{Config, CargoResult, ChainError, human, Sha256, Filesystem}; use util::FileLock; +use util::paths; +use util::{Config, CargoResult, ChainError, human, Sha256, Filesystem}; pub struct LocalRegistry<'cfg> { index_path: Filesystem, @@ -34,7 +35,11 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> { &self.index_path } - fn config(&self) -> CargoResult> { + fn load(&self, root: &Path, path: &Path) -> CargoResult> { + paths::read_bytes(&root.join(path)) + } + + fn config(&mut self) -> CargoResult> { // Local registries don't have configuration for remote APIs or anything // like that Ok(None) diff --git a/src/cargo/sources/registry/mod.rs b/src/cargo/sources/registry/mod.rs index 2044c30ebfe..2af4ad9719f 100644 --- a/src/cargo/sources/registry/mod.rs +++ b/src/cargo/sources/registry/mod.rs @@ -219,7 +219,8 @@ struct RegistryDependency { pub trait RegistryData { fn index_path(&self) -> &Filesystem; - fn config(&self) -> CargoResult>; + fn load(&self, root: &Path, path: &Path) -> CargoResult>; + fn config(&mut self) -> CargoResult>; fn update_index(&mut self) -> CargoResult<()>; fn download(&mut self, pkg: &PackageId, @@ -274,7 +275,7 @@ impl<'cfg> RegistrySource<'cfg> { /// Decode the configuration stored within the registry. /// /// This requires that the index has been at least checked out. - pub fn config(&self) -> CargoResult> { + pub fn config(&mut self) -> CargoResult> { self.ops.config() } @@ -323,12 +324,12 @@ impl<'cfg> Registry for RegistrySource<'cfg> { // come back with no summaries, then our registry may need to be // updated, so we fall back to performing a lazy update. if dep.source_id().precise().is_some() && !self.updated { - if self.index.query(dep)?.is_empty() { + if self.index.query(dep, &mut *self.ops)?.is_empty() { self.do_update()?; } } - self.index.query(dep) + self.index.query(dep, &mut *self.ops) } fn supports_checksums(&self) -> bool { @@ -356,7 +357,7 @@ impl<'cfg> Source for RegistrySource<'cfg> { } fn download(&mut self, package: &PackageId) -> CargoResult { - let hash = self.index.hash(package)?; + let hash = self.index.hash(package, &mut *self.ops)?; let path = self.ops.download(package, &hash)?; let path = self.unpack_package(package, &path).chain_error(|| { internal(format!("failed to unpack package `{}`", package)) @@ -369,7 +370,7 @@ impl<'cfg> Source for RegistrySource<'cfg> { // differ due to historical Cargo bugs. To paper over these we trash the // *summary* loaded from the Cargo.toml we just downloaded with the one // we loaded from the index. - let summaries = self.index.summaries(package.name())?; + let summaries = self.index.summaries(package.name(), &mut *self.ops)?; let summary = summaries.iter().map(|s| &s.0).find(|s| { s.package_id() == package }).expect("summary not found"); diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 7408eb45060..78c9cb5b5fa 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -1,5 +1,7 @@ +use std::cell::{RefCell, Ref, Cell}; use std::io::SeekFrom; use std::io::prelude::*; +use std::mem; use std::path::Path; use curl::easy::{Easy, List}; @@ -13,8 +15,7 @@ use ops; use sources::git; use sources::registry::{RegistryData, RegistryConfig, INDEX_LOCK}; use util::network; -use util::paths; -use util::{FileLock, Filesystem}; +use util::{FileLock, Filesystem, LazyCell}; use util::{Config, CargoResult, ChainError, human, Sha256, ToUrl}; use util::errors::HttpError; @@ -23,7 +24,10 @@ pub struct RemoteRegistry<'cfg> { cache_path: Filesystem, source_id: SourceId, config: &'cfg Config, - handle: Option, + handle: LazyCell>, + tree: RefCell>>, + repo: LazyCell, + head: Cell>, } impl<'cfg> RemoteRegistry<'cfg> { @@ -34,9 +38,74 @@ impl<'cfg> RemoteRegistry<'cfg> { cache_path: config.registry_cache_path().join(name), source_id: source_id.clone(), config: config, - handle: None, + tree: RefCell::new(None), + handle: LazyCell::new(), + repo: LazyCell::new(), + head: Cell::new(None), } } + + fn easy(&self) -> CargoResult<&RefCell> { + self.handle.get_or_try_init(|| { + ops::http_handle(self.config).map(RefCell::new) + }) + } + + fn repo(&self) -> CargoResult<&git2::Repository> { + self.repo.get_or_try_init(|| { + let path = self.index_path.clone().into_path_unlocked(); + + match git2::Repository::open(&path) { + Ok(repo) => Ok(repo), + Err(_) => { + self.index_path.create_dir()?; + let lock = self.index_path.open_rw(Path::new(INDEX_LOCK), + self.config, + "the registry index")?; + let _ = lock.remove_siblings(); + Ok(git2::Repository::init_bare(&path)?) + } + } + }) + } + + fn head(&self) -> CargoResult { + if self.head.get().is_none() { + let oid = self.repo()?.refname_to_id("refs/remotes/origin/master")?; + self.head.set(Some(oid)); + } + Ok(self.head.get().unwrap()) + } + + fn tree(&self) -> CargoResult> { + { + let tree = self.tree.borrow(); + if tree.is_some() { + return Ok(Ref::map(tree, |s| s.as_ref().unwrap())) + } + } + let repo = self.repo()?; + let commit = repo.find_commit(self.head()?)?; + let tree = commit.tree()?; + + // Unfortunately in libgit2 the tree objects look like they've got a + // reference to the repository object which means that a tree cannot + // outlive the repository that it came from. Here we want to cache this + // tree, though, so to accomplish this we transmute it to a static + // lifetime. + // + // Note that we don't actually hand out the static lifetime, instead we + // only return a scoped one from this function. Additionally the repo + // we loaded from (above) lives as long as this object + // (`RemoteRegistry`) so we then just need to ensure that the tree is + // destroyed first in the destructor, hence the destructor on + // `RemoteRegistry` below. + let tree = unsafe { + mem::transmute::>(tree) + }; + *self.tree.borrow_mut() = Some(tree); + Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap())) + } } impl<'cfg> RegistryData for RemoteRegistry<'cfg> { @@ -44,13 +113,28 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { &self.index_path } - fn config(&self) -> CargoResult> { - let lock = self.index_path.open_ro(Path::new(INDEX_LOCK), - self.config, - "the registry index")?; - let path = lock.path().parent().unwrap(); - let contents = paths::read(&path.join("config.json"))?; - let config = serde_json::from_str(&contents)?; + fn load(&self, _root: &Path, path: &Path) -> CargoResult> { + // Note that the index calls this method and the filesystem is locked + // in the index, so we don't need to worry about an `update_index` + // happening in a different process. + let repo = self.repo()?; + let tree = self.tree()?; + let entry = tree.get_path(path)?; + let object = entry.to_object(&repo)?; + let blob = match object.as_blob() { + Some(blob) => blob, + None => bail!("path `{}` is not a blob in the git repo", path.display()), + }; + Ok(blob.content().to_vec()) + } + + fn config(&mut self) -> CargoResult> { + self.repo()?; // create intermediate dirs and initialize the repo + let _lock = self.index_path.open_ro(Path::new(INDEX_LOCK), + self.config, + "the registry index")?; + let json = self.load(Path::new(""), Path::new("config.json"))?; + let config = serde_json::from_slice(&json)?; Ok(Some(config)) } @@ -63,53 +147,33 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { // hit the index, which may not actually read this configuration. ops::http_handle(self.config)?; - // Then we actually update the index - self.index_path.create_dir()?; - let lock = self.index_path.open_rw(Path::new(INDEX_LOCK), - self.config, - "the registry index")?; - let path = lock.path().parent().unwrap(); - + let repo = self.repo()?; + let _lock = self.index_path.open_rw(Path::new(INDEX_LOCK), + self.config, + "the registry index")?; self.config.shell().status("Updating", format!("registry `{}`", self.source_id.url()))?; - let repo = git2::Repository::open(path).or_else(|_| { - let _ = lock.remove_siblings(); - git2::Repository::init(path) - })?; - if self.source_id.url().host_str() == Some("github.com") { - if let Ok(oid) = repo.refname_to_id("refs/heads/master") { - let handle = match self.handle { - Some(ref mut handle) => handle, - None => { - self.handle = Some(ops::http_handle(self.config)?); - self.handle.as_mut().unwrap() - } - }; + if let Ok(oid) = self.head() { + let mut handle = self.easy()?.borrow_mut(); debug!("attempting github fast path for {}", self.source_id.url()); - if github_up_to_date(handle, self.source_id.url(), &oid) { + if github_up_to_date(&mut handle, self.source_id.url(), &oid) { return Ok(()) } debug!("fast path failed, falling back to a git fetch"); } } - // git fetch origin + // git fetch origin master let url = self.source_id.url().to_string(); let refspec = "refs/heads/master:refs/remotes/origin/master"; - git::fetch(&repo, &url, refspec, self.config).chain_error(|| { human(format!("failed to fetch `{}`", url)) })?; - - // git reset --hard origin/master - let reference = "refs/remotes/origin/master"; - let oid = repo.refname_to_id(reference)?; - trace!("[{}] updating to rev {}", self.source_id, oid); - let object = repo.find_object(oid, None)?; - repo.reset(&object, git2::ResetType::Hard, None)?; + self.head.set(None); + *self.tree.borrow_mut() = None; Ok(()) } @@ -144,17 +208,11 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { .push(&pkg.version().to_string()) .push("download"); - let handle = match self.handle { - Some(ref mut handle) => handle, - None => { - self.handle = Some(ops::http_handle(self.config)?); - self.handle.as_mut().unwrap() - } - }; // TODO: don't download into memory, but ensure that if we ctrl-c a // download we should resume either from the start or the middle // on the next time let url = url.to_string(); + let mut handle = self.easy()?.borrow_mut(); handle.get(true)?; handle.url(&url)?; handle.follow_location(true)?; @@ -192,6 +250,13 @@ impl<'cfg> RegistryData for RemoteRegistry<'cfg> { } } +impl<'cfg> Drop for RemoteRegistry<'cfg> { + fn drop(&mut self) { + // Just be sure to drop this before our other fields + self.tree.borrow_mut().take(); + } +} + /// Updating the index is done pretty regularly so we want it to be as fast as /// possible. For registries hosted on github (like the crates.io index) there's /// a fast path available to use [1] to tell us that there's no updates to be