From dda6f2af126ca06d653e7d613232fee5b804d680 Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Mon, 16 Oct 2023 23:57:40 +0200 Subject: [PATCH] Separate registry-api from the index, use the configured base url --- src/bin/cratesfyi.rs | 17 ++++++---- src/config.rs | 8 +++-- src/context.rs | 5 ++- src/db/add_package.rs | 2 +- src/db/delete.rs | 2 +- src/docbuilder/rustwide_builder.rs | 11 +++--- src/{index/mod.rs => index.rs} | 48 ++++----------------------- src/index/crates.rs | 0 src/lib.rs | 2 ++ src/{index/api.rs => registry_api.rs} | 18 ++++------ src/test/fakes.rs | 2 +- src/test/mod.rs | 25 +++++++++++--- src/web/crate_details.rs | 2 +- src/web/releases.rs | 19 +++++------ 14 files changed, 74 insertions(+), 87 deletions(-) rename src/{index/mod.rs => index.rs} (59%) delete mode 100644 src/index/crates.rs rename src/{index/api.rs => registry_api.rs} (92%) diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 92e38fc8f..39349b067 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -16,7 +16,8 @@ use docs_rs::utils::{ }; use docs_rs::{ start_background_metrics_webserver, start_web_server, AsyncStorage, BuildQueue, Config, - Context, Index, InstanceMetrics, PackageKind, RustwideBuilder, ServiceMetrics, Storage, + Context, Index, InstanceMetrics, PackageKind, RegistryApi, RustwideBuilder, ServiceMetrics, + Storage, }; use humantime::Duration; use once_cell::sync::OnceCell; @@ -537,12 +538,10 @@ impl DatabaseSubcommand { } Self::UpdateCrateRegistryFields { name } => { - let index = ctx.index()?; - db::update_crate_data_in_database( &mut *ctx.conn()?, &name, - &index.api().get_crate_data(&name)?, + &ctx.registry_api()?.get_crate_data(&name)?, )?; } @@ -719,6 +718,7 @@ struct BinContext { service_metrics: OnceCell>, instance_metrics: OnceCell>, index: OnceCell>, + registry_api: OnceCell>, repository_stats_updater: OnceCell>, runtime: OnceCell>, } @@ -734,6 +734,7 @@ impl BinContext { service_metrics: OnceCell::new(), instance_metrics: OnceCell::new(), index: OnceCell::new(), + registry_api: OnceCell::new(), repository_stats_updater: OnceCell::new(), runtime: OnceCell::new(), } @@ -783,11 +784,15 @@ impl Context for BinContext { let config = self.config()?; let path = config.registry_index_path.clone(); if let Some(registry_url) = config.registry_url.clone() { - Index::from_url(path, registry_url, config.crates_io_api_call_retries) + Index::from_url(path, registry_url) } else { - Index::new(path, config.crates_io_api_call_retries) + Index::new(path) }? }; + fn registry_api(self) -> RegistryApi = { + let config = self.config()?; + RegistryApi::new(config.registry_api_host.clone(), config.crates_io_api_call_retries)? + }; fn repository_stats_updater(self) -> RepositoryStatsUpdater = { let config = self.config()?; let pool = self.pool()?; diff --git a/src/config.rs b/src/config.rs index f1cea68bf..4fe7814cc 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,13 +2,14 @@ use crate::{cdn::CdnKind, storage::StorageKind}; use anyhow::{anyhow, bail, Context, Result}; use std::{env::VarError, error::Error, path::PathBuf, str::FromStr, time::Duration}; use tracing::trace; +use url::Url; #[derive(Debug)] pub struct Config { pub prefix: PathBuf, pub registry_index_path: PathBuf, pub registry_url: Option, - pub registry_api_host: String, + pub registry_api_host: Url, // Database connection params pub(crate) database_url: String, @@ -140,7 +141,10 @@ impl Config { registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?, registry_url: maybe_env("REGISTRY_URL")?, - registry_api_host: env("DOCSRS_REGISTRY_API_HOST", "https://crates.io".into())?, + registry_api_host: env( + "DOCSRS_REGISTRY_API_HOST", + "https://crates.io".parse().unwrap(), + )?, prefix: prefix.clone(), database_url: require_env("DOCSRS_DATABASE_URL")?, diff --git a/src/context.rs b/src/context.rs index 20c274903..5c0516035 100644 --- a/src/context.rs +++ b/src/context.rs @@ -2,7 +2,9 @@ use crate::cdn::CdnBackend; use crate::db::Pool; use crate::error::Result; use crate::repositories::RepositoryStatsUpdater; -use crate::{AsyncStorage, BuildQueue, Config, Index, InstanceMetrics, ServiceMetrics, Storage}; +use crate::{ + AsyncStorage, BuildQueue, Config, Index, InstanceMetrics, RegistryApi, ServiceMetrics, Storage, +}; use std::sync::Arc; use tokio::runtime::Runtime; @@ -16,6 +18,7 @@ pub trait Context { fn service_metrics(&self) -> Result>; fn instance_metrics(&self) -> Result>; fn index(&self) -> Result>; + fn registry_api(&self) -> Result>; fn repository_stats_updater(&self) -> Result>; fn runtime(&self) -> Result>; } diff --git a/src/db/add_package.rs b/src/db/add_package.rs index 1ba775c9a..5ef8a12d7 100644 --- a/src/db/add_package.rs +++ b/src/db/add_package.rs @@ -2,7 +2,7 @@ use crate::{ db::types::Feature, docbuilder::{BuildResult, DocCoverage}, error::Result, - index::api::{CrateData, CrateOwner, ReleaseData}, + registry_api::{CrateData, CrateOwner, ReleaseData}, storage::CompressionAlgorithm, utils::MetadataPackage, web::crate_details::CrateDetails, diff --git a/src/db/delete.rs b/src/db/delete.rs index 7967bef7b..a218dd810 100644 --- a/src/db/delete.rs +++ b/src/db/delete.rs @@ -194,7 +194,7 @@ fn delete_crate_from_database(conn: &mut Client, name: &str, crate_id: i32) -> R #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::registry_api::CrateOwner; use crate::test::{assert_success, wrapper}; use postgres::Client; use test_case::test_case; diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs index 66e76154f..9a1e8eb35 100644 --- a/src/docbuilder/rustwide_builder.rs +++ b/src/docbuilder/rustwide_builder.rs @@ -13,7 +13,7 @@ use crate::utils::{ }; use crate::RUSTDOC_STATIC_STORAGE_PREFIX; use crate::{db::blacklist::is_blacklisted, utils::MetadataPackage}; -use crate::{Config, Context, Index, InstanceMetrics, Storage}; +use crate::{Config, Context, InstanceMetrics, RegistryApi, Storage}; use anyhow::{anyhow, bail, Context as _, Error}; use docsrs_metadata::{BuildTargets, Metadata, DEFAULT_TARGETS, HOST_TARGET}; use failure::Error as FailureError; @@ -87,7 +87,7 @@ pub struct RustwideBuilder { db: Pool, storage: Arc, metrics: Arc, - index: Arc, + registry_api: Arc, rustc_version: String, repository_stats_updater: Arc, workspace_initialize_time: Instant, @@ -105,7 +105,7 @@ impl RustwideBuilder { db: pool, storage: context.storage()?, metrics: context.instance_metrics()?, - index: context.index()?, + registry_api: context.registry_api()?, rustc_version: String::new(), repository_stats_updater: context.repository_stats_updater()?, workspace_initialize_time: Instant::now(), @@ -519,8 +519,7 @@ impl RustwideBuilder { let release_data = if !is_local { match self - .index - .api() + .registry_api .get_release_data(name, version) .with_context(|| { format!("could not fetch releases-data for {name}-{version}") @@ -565,7 +564,7 @@ impl RustwideBuilder { // Some crates.io crate data is mutable, so we proactively update it during a release if !is_local { - match self.index.api().get_crate_data(name) { + match self.registry_api.get_crate_data(name) { Ok(crate_data) => { update_crate_data_in_database(&mut conn, name, &crate_data)? } diff --git a/src/index/mod.rs b/src/index.rs similarity index 59% rename from src/index/mod.rs rename to src/index.rs index 352ead90d..9d4ecd537 100644 --- a/src/index/mod.rs +++ b/src/index.rs @@ -3,72 +3,40 @@ use std::{path::PathBuf, process::Command}; use anyhow::Context; use crates_index_diff::gix; -use url::Url; -use self::api::Api; use crate::error::Result; use crate::utils::report_error; -pub(crate) mod api; - pub struct Index { path: PathBuf, - api: Api, repository_url: Option, } -#[derive(Debug, serde::Deserialize, Clone)] -#[serde(rename_all = "kebab-case")] -struct IndexConfig { - #[serde(default)] - api: Option, -} - -/// Inspects the given repository to find the config as specified in [RFC 2141][], assumes that the -/// repository has a remote called `origin` and that the branch `master` exists on it. -/// -/// [RFC 2141]: https://rust-lang.github.io/rfcs/2141-alternative-registries.html -fn load_config(repo: &gix::Repository) -> Result { - let file = repo - .rev_parse_single("refs/remotes/origin/master:config.json") - .with_context(|| anyhow::anyhow!("registry index missing ./config.json in root"))? - .object()?; - - let config = serde_json::from_slice(&file.data)?; - Ok(config) -} - impl Index { - pub fn from_url(path: PathBuf, url: String, max_api_call_retries: u32) -> Result { - let diff = crates_index_diff::Index::from_path_or_cloned_with_options( + pub fn from_url(path: PathBuf, url: String) -> Result { + crates_index_diff::Index::from_path_or_cloned_with_options( &path, gix::progress::Discard, &AtomicBool::default(), crates_index_diff::index::CloneOptions { url: url.clone() }, ) + .map(|_| ()) .context("initialising registry index repository")?; - let config = load_config(diff.repository()).context("loading registry config")?; - let api = Api::new(config.api, max_api_call_retries) - .context("initialising registry api client")?; Ok(Self { path, - api, repository_url: Some(url), }) } - pub fn new(path: PathBuf, max_api_call_retries: u32) -> Result { + pub fn new(path: PathBuf) -> Result { // This initializes the repository, then closes it afterwards to avoid leaking file descriptors. // See https://github.com/rust-lang/docs.rs/pull/847 - let diff = crates_index_diff::Index::from_path_or_cloned(&path) + crates_index_diff::Index::from_path_or_cloned(&path) + .map(|_| ()) .context("initialising registry index repository")?; - let config = load_config(diff.repository()).context("loading registry config")?; - let api = Api::new(config.api, max_api_call_retries) - .context("initialising registry api client")?; Ok(Self { path, - api, repository_url: None, }) } @@ -102,10 +70,6 @@ impl Index { Ok(index) } - pub fn api(&self) -> &Api { - &self.api - } - pub fn run_git_gc(&self) { let gc = Command::new("git") .arg("-C") diff --git a/src/index/crates.rs b/src/index/crates.rs deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/lib.rs b/src/lib.rs index 518a6ad77..bbeb2a3a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ pub use self::docbuilder::PackageKind; pub use self::docbuilder::RustwideBuilder; pub use self::index::Index; pub use self::metrics::{InstanceMetrics, ServiceMetrics}; +pub use self::registry_api::RegistryApi; pub use self::storage::{AsyncStorage, Storage}; pub use self::web::{start_background_metrics_webserver, start_web_server}; @@ -21,6 +22,7 @@ mod docbuilder; mod error; pub mod index; pub mod metrics; +mod registry_api; pub mod repositories; pub mod storage; #[cfg(test)] diff --git a/src/index/api.rs b/src/registry_api.rs similarity index 92% rename from src/index/api.rs rename to src/registry_api.rs index 62e76f280..64f89a7c5 100644 --- a/src/index/api.rs +++ b/src/registry_api.rs @@ -13,8 +13,8 @@ const APP_USER_AGENT: &str = concat!( ); #[derive(Debug)] -pub struct Api { - api_base: Option, +pub struct RegistryApi { + api_base: Url, max_retries: u32, client: reqwest::blocking::Client, } @@ -47,8 +47,8 @@ pub struct CrateOwner { pub(crate) login: String, } -impl Api { - pub(super) fn new(api_base: Option, max_retries: u32) -> Result { +impl RegistryApi { + pub fn new(api_base: Url, max_retries: u32) -> Result { let headers = vec![ (USER_AGENT, HeaderValue::from_static(APP_USER_AGENT)), (ACCEPT, HeaderValue::from_static("application/json")), @@ -67,12 +67,6 @@ impl Api { }) } - fn api_base(&self) -> Result { - self.api_base - .clone() - .with_context(|| anyhow!("index is missing an api base url")) - } - pub fn get_crate_data(&self, name: &str) -> Result { let owners = self .get_owners(name) @@ -100,7 +94,7 @@ impl Api { version: &str, ) -> Result<(DateTime, bool, i32)> { let url = { - let mut url = self.api_base()?; + let mut url = self.api_base.clone(); url.path_segments_mut() .map_err(|()| anyhow!("Invalid API url"))? .extend(&["api", "v1", "crates", name, "versions"]); @@ -142,7 +136,7 @@ impl Api { /// Fetch owners from the registry's API fn get_owners(&self, name: &str) -> Result> { let url = { - let mut url = self.api_base()?; + let mut url = self.api_base.clone(); url.path_segments_mut() .map_err(|()| anyhow!("Invalid API url"))? .extend(&["api", "v1", "crates", name, "owners"]); diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 299a2e8da..aa0e735a3 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -2,7 +2,7 @@ use super::TestDatabase; use crate::docbuilder::{BuildResult, DocCoverage}; use crate::error::Result; -use crate::index::api::{CrateData, CrateOwner, ReleaseData}; +use crate::registry_api::{CrateData, CrateOwner, ReleaseData}; use crate::storage::{rustdoc_archive_path, source_archive_path, Storage}; use crate::utils::{Dependency, MetadataPackage, Target}; use anyhow::Context; diff --git a/src/test/mod.rs b/src/test/mod.rs index 4e34fed1b..9ec8dca5e 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -7,7 +7,7 @@ use crate::error::Result; use crate::repositories::RepositoryStatsUpdater; use crate::storage::{AsyncStorage, Storage, StorageKind}; use crate::web::{build_axum_app, cache, page::TemplateData}; -use crate::{BuildQueue, Config, Context, Index, InstanceMetrics, ServiceMetrics}; +use crate::{BuildQueue, Config, Context, Index, InstanceMetrics, RegistryApi, ServiceMetrics}; use anyhow::Context as _; use fn_error_context::context; use once_cell::unsync::OnceCell; @@ -229,6 +229,7 @@ pub(crate) struct TestEnvironment { async_storage: OnceCell>, cdn: OnceCell>, index: OnceCell>, + registry_api: OnceCell>, runtime: OnceCell>, instance_metrics: OnceCell>, service_metrics: OnceCell>, @@ -263,6 +264,7 @@ impl TestEnvironment { async_storage: OnceCell::new(), cdn: OnceCell::new(), index: OnceCell::new(), + registry_api: OnceCell::new(), instance_metrics: OnceCell::new(), service_metrics: OnceCell::new(), frontend: OnceCell::new(), @@ -406,11 +408,22 @@ impl TestEnvironment { self.index .get_or_init(|| { Arc::new( - Index::new( - self.config().registry_index_path.clone(), + Index::new(self.config().registry_index_path.clone()) + .expect("failed to initialize the index"), + ) + }) + .clone() + } + + pub(crate) fn registry_api(&self) -> Arc { + self.registry_api + .get_or_init(|| { + Arc::new( + RegistryApi::new( + self.config().registry_api_host.clone(), self.config().crates_io_api_call_retries, ) - .expect("failed to initialize the index"), + .expect("failed to initialize the registry api"), ) }) .clone() @@ -489,6 +502,10 @@ impl Context for TestEnvironment { Ok(self.index()) } + fn registry_api(&self) -> Result> { + Ok(self.registry_api()) + } + fn repository_stats_updater(&self) -> Result> { Ok(self.repository_stats_updater()) } diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 280aaf46c..4c6f584ff 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -666,7 +666,7 @@ pub(crate) async fn get_all_platforms( #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::registry_api::CrateOwner; use crate::test::{ assert_cache_control, assert_redirect, assert_redirect_cached, wrapper, TestDatabase, TestEnvironment, diff --git a/src/web/releases.rs b/src/web/releases.rs index 58a299919..acf6611d5 100644 --- a/src/web/releases.rs +++ b/src/web/releases.rs @@ -163,10 +163,9 @@ async fn get_search_results( .unwrap() }); - let url = url::Url::parse(&format!( - "{}/api/v1/crates{query_params}", - config.registry_api_host - ))?; + let url = config + .registry_api_host + .join(&format!("/api/v1/crates{query_params}"))?; debug!("fetching search results from {}", url); // extract the query from the query args. @@ -747,7 +746,7 @@ pub(crate) async fn build_queue_handler( #[cfg(test)] mod tests { use super::*; - use crate::index::api::CrateOwner; + use crate::registry_api::CrateOwner; use crate::test::{ assert_redirect, assert_redirect_unchecked, assert_success, wrapper, TestFrontend, }; @@ -899,7 +898,7 @@ mod tests { wrapper(|env| { let mut crates_io = mockito::Server::new(); env.override_config(|config| { - config.registry_api_host = crates_io.url(); + config.registry_api_host = crates_io.url().parse().unwrap(); }); let web = env.frontend(); @@ -985,7 +984,7 @@ mod tests { let mut crates_io = mockito::Server::new(); env.override_config(|config| { config.crates_io_api_call_retries = 0; - config.registry_api_host = crates_io.url(); + config.registry_api_host = crates_io.url().parse().unwrap(); }); let _m = crates_io @@ -1013,7 +1012,7 @@ mod tests { wrapper(|env| { let mut crates_io = mockito::Server::new(); env.override_config(|config| { - config.registry_api_host = crates_io.url(); + config.registry_api_host = crates_io.url().parse().unwrap(); }); let web = env.frontend(); @@ -1060,7 +1059,7 @@ mod tests { wrapper(|env| { let mut crates_io = mockito::Server::new(); env.override_config(|config| { - config.registry_api_host = crates_io.url(); + config.registry_api_host = crates_io.url().parse().unwrap(); }); let web = env.frontend(); @@ -1107,7 +1106,7 @@ mod tests { wrapper(|env| { let mut crates_io = mockito::Server::new(); env.override_config(|config| { - config.registry_api_host = crates_io.url(); + config.registry_api_host = crates_io.url().parse().unwrap(); }); let web = env.frontend();