diff --git a/Cargo.lock b/Cargo.lock index 930315088..aecccb3b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1518,8 +1518,7 @@ dependencies = [ [[package]] name = "ethers" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a58ce802c65cf3d0756dee5a61094a92cde53c1583b246e9ee5b37226c7fc15" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "ethers-addressbook", "ethers-contract", @@ -1534,8 +1533,7 @@ dependencies = [ [[package]] name = "ethers-addressbook" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b856b7b8ff5c961093cb8efe151fbcce724b451941ce20781de11a531ccd578" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "ethers-core", "once_cell", @@ -1546,13 +1544,13 @@ dependencies = [ [[package]] name = "ethers-contract" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e066a0d9cfc70c454672bf16bb433b0243427420076dc5b2f49c448fb5a10628" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "ethers-contract-abigen", "ethers-contract-derive", "ethers-core", "ethers-providers", + "ethers-signers", "futures-util", "hex", "once_cell", @@ -1565,8 +1563,7 @@ dependencies = [ [[package]] name = "ethers-contract-abigen" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c113e3e86b6bc16d98484b2c3bb2d01d6fed9f489fe2e592e5cc87c3024d616b" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "Inflector", "dunce", @@ -1589,8 +1586,7 @@ dependencies = [ [[package]] name = "ethers-contract-derive" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3fb5adee25701c79ec58fcf2c63594cd8829bc9ad6037ff862d5a111101ed2" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "Inflector", "ethers-contract-abigen", @@ -1605,8 +1601,7 @@ dependencies = [ [[package]] name = "ethers-core" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6da5fa198af0d3be20c19192df2bd9590b92ce09a8421e793bec8851270f1b05" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "arrayvec", "bytes", @@ -1635,8 +1630,7 @@ dependencies = [ [[package]] name = "ethers-etherscan" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84ebb401ba97c6f5af278c2c9936c4546cad75dec464b439ae6df249906f4caa" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "ethers-core", "reqwest", @@ -1650,8 +1644,7 @@ dependencies = [ [[package]] name = "ethers-middleware" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "740f4a773c19dd6d6a68c8c2e0996c096488d38997d524e21dc612c55da3bd24" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "async-trait", "auto_impl", @@ -1677,8 +1670,7 @@ dependencies = [ [[package]] name = "ethers-providers" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56b498fd2a6c019d023e43e83488cd1fb0721f299055975aa6bac8dbf1e95f2c" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "async-trait", "auto_impl", @@ -1713,8 +1705,7 @@ dependencies = [ [[package]] name = "ethers-signers" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02c4b7e15f212fa7cc2e1251868320221d4ff77a3d48068e69f47ce1c491df2d" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "async-trait", "coins-bip32", @@ -1732,8 +1723,7 @@ dependencies = [ [[package]] name = "ethers-solc" version = "2.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a81c89f121595cf8959e746045bb8b25a6a38d72588561e1a3b7992fc213f674" +source = "git+https://github.com/gakonst/ethers-rs#df165b84229cdc1c65e8522e0c1aeead3746d9a8" dependencies = [ "cfg-if", "dunce", @@ -4332,24 +4322,24 @@ dependencies = [ [[package]] name = "strum" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" dependencies = [ "strum_macros", ] [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157" dependencies = [ "heck", "proc-macro2 1.0.60", "quote 1.0.28", "rustversion", - "syn 1.0.104", + "syn 2.0.18", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index e58371105..b6774c08b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -63,7 +63,7 @@ axum-tungstenite = "0.2.0" rand = "0.8.4" prometheus-http-query = "0.6.6" -ethers = "2.0.7" +ethers = { version = "2.0.7", git = "https://github.com/gakonst/ethers-rs" } # using Git version because crates.io version fails clippy [dev-dependencies] diff --git a/src/error.rs b/src/error.rs index 970ed5232..e27c91a76 100644 --- a/src/error.rs +++ b/src/error.rs @@ -58,9 +58,6 @@ pub enum RpcError { #[error("Invalid address")] IdentityInvalidAddress, - #[error("Identity not found. Error: {0}")] - IdentityNotFound(String), - #[error("Ethers provider error: {0}")] EthersProviderError(ethers::providers::ProviderError), } @@ -127,14 +124,6 @@ impl IntoResponse for RpcError { )), ) .into_response(), - Self::IdentityNotFound(e) => ( - StatusCode::NOT_FOUND, - Json(new_error_response( - "address".to_string(), - format!("Could not find the ENS name: {e}"), - )), - ) - .into_response(), e => { error!("Internal server error: {}", e); ( diff --git a/src/handlers/identity.rs b/src/handlers/identity.rs index 786e154fb..79e5041e6 100644 --- a/src/handlers/identity.rs +++ b/src/handlers/identity.rs @@ -1,6 +1,6 @@ use { crate::{ - error::RpcError, + error::{new_error_response, RpcError}, extractors::method::Method, handlers::RpcQueryParams, json_rpc::{JsonRpcError, JsonRpcResponse}, @@ -9,24 +9,28 @@ use { async_trait::async_trait, axum::{ extract::{ConnectInfo, MatchedPath, Path, Query, State}, + response::{IntoResponse, Response}, Json, }, core::fmt, ethers::{ abi::Address, providers::{JsonRpcClient, Middleware, Provider, ProviderError}, + types::H160, }, hyper::{body::to_bytes, HeaderMap, Method as HyperMethod, StatusCode}, - serde::{de::DeserializeOwned, Serialize}, + serde::{de::DeserializeOwned, Deserialize, Serialize}, std::{ net::SocketAddr, sync::Arc, - time::{SystemTime, UNIX_EPOCH}, + time::{Duration, SystemTime, UNIX_EPOCH}, }, - tracing::debug, + tap::TapFallible, + tracing::{debug, warn}, }; -#[derive(Serialize)] +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Hash, Clone)] +#[serde(rename_all = "camelCase")] pub struct IdentityResponse { name: String, avatar: Option, @@ -39,47 +43,162 @@ pub async fn handler( path: MatchedPath, headers: HeaderMap, Path(address): Path, -) -> Result, RpcError> { +) -> Result { + let start = SystemTime::now(); + state.metrics.add_identity_lookup(); + + let address = address + .parse::
() + .map_err(|_| RpcError::IdentityInvalidAddress)?; + + let (source, res) = + lookup_identity(address, state.clone(), connect_info, query, path, headers).await?; + + state.metrics.add_identity_lookup_latency(start, &source); + state.metrics.add_identity_lookup_success(&source); + + if let Some(IdentityResponse { avatar, .. }) = &res { + state.metrics.add_identity_lookup_name_present(); + if avatar.is_some() { + state.metrics.add_identity_lookup_avatar_present(); + } + } + + Ok(if let Some(res) = res { + Json(res).into_response() + } else { + ( + StatusCode::NOT_FOUND, + Json(new_error_response( + "address".to_string(), + "Could not find the address".to_owned(), + )), + ) + .into_response() + }) +} + +pub enum IdentityLookupSource { + Cache, + Rpc, +} + +impl IdentityLookupSource { + pub fn as_str(&self) -> &'static str { + match self { + Self::Cache => "cache", + Self::Rpc => "rpc", + } + } +} + +async fn lookup_identity( + address: H160, + state: State>, + connect_info: ConnectInfo, + query: Query, + path: MatchedPath, + headers: HeaderMap, +) -> Result<(IdentityLookupSource, Option), RpcError> { + let cache_key = format!("{}", address); + if let Some(cache) = &state.identity_cache { + debug!("Checking cache for identity"); + let cache_start = SystemTime::now(); + let value = cache.get(&cache_key).await?; + state.metrics.add_identity_lookup_cache_latency(cache_start); + if let Some(response) = value { + return Ok((IdentityLookupSource::Cache, response)); + } + } + + let res = + lookup_identity_rpc(address, state.clone(), connect_info, query, path, headers).await?; + + if let Some(cache) = &state.identity_cache { + debug!("Saving to cache"); + let cache = cache.clone(); + let res = res.clone(); + let cache_ttl = Duration::from_secs(60 * 60 * 24); + // Do not block on cache write. + tokio::spawn(async move { + cache + .set(&cache_key, &res, Some(cache_ttl)) + .await + .tap_err(|err| { + warn!("failed to cache identity lookup (cache_key:{cache_key}): {err:?}") + }) + .ok(); + debug!("Setting cache success"); + }); + } + + Ok((IdentityLookupSource::Rpc, res)) +} + +async fn lookup_identity_rpc( + address: H160, + state: State>, + connect_info: ConnectInfo, + query: Query, + path: MatchedPath, + headers: HeaderMap, +) -> Result, RpcError> { let provider = Provider::new(SelfProvider { - state, + state: state.clone(), connect_info, query, path, headers, }); - let address = address - .parse::
() - .map_err(|_| RpcError::IdentityInvalidAddress)?; - + debug!("Beginning name lookup"); + state.metrics.add_identity_lookup_name(); + let name_lookup_start = SystemTime::now(); let name = provider .lookup_address(address) .await - .map_err(|e| match e { - ProviderError::EnsError(e) | ProviderError::EnsNotOwned(e) => { - RpcError::IdentityNotFound(e) - } - e => RpcError::EthersProviderError(e), - })?; - - let avatar = provider.resolve_avatar(&name).await.map_or_else( - |e| match e { - ProviderError::EnsError(_) | ProviderError::EnsNotOwned(_) => Ok(None), - ProviderError::CustomError(e) if e.starts_with("relative URL without a base") => { - // Seems not having an `avatar` field returns this error - Ok(None) - } - e => Err(RpcError::EthersProviderError(e)), - }, - |url| Ok(Some(url)), - )?; - - let res = IdentityResponse { - name, - avatar: avatar.map(|url| url.to_string()), + .tap_err(|err| debug!("Error while looking up name: {err:?}")) + .map_or_else( + |e| match e { + ProviderError::EnsError(_) | ProviderError::EnsNotOwned(_) => Ok(None), + e => Err(RpcError::EthersProviderError(e)), + }, + |name| Ok(Some(name)), + )?; + state + .metrics + .add_identity_lookup_name_latency(name_lookup_start); + state.metrics.add_identity_lookup_name_success(); + let name = match name { + Some(name) => name, + None => return Ok(None), }; - Ok(Json(res)) + debug!("Beginning avatar lookup"); + state.metrics.add_identity_lookup_avatar(); + let avatar_lookup_start = SystemTime::now(); + let avatar = provider + .resolve_avatar(&name) + .await + .tap_err(|err| debug!("Error while looking up avatar: {err:?}")) + .map_or_else( + |e| match e { + ProviderError::EnsError(_) | ProviderError::EnsNotOwned(_) => Ok(None), + ProviderError::CustomError(e) if e.starts_with("relative URL without a base") => { + // Seems not having an `avatar` field returns this error + Ok(None) + } + e => Err(RpcError::EthersProviderError(e)), + }, + |url| Ok(Some(url)), + )? + .map(|url| url.to_string()); + state + .metrics + .add_identity_lookup_avatar_latency(avatar_lookup_start); + state.metrics.add_identity_lookup_avatar_success(); + + Ok(Some(IdentityResponse { name, avatar })) } struct SelfProvider { @@ -155,17 +274,14 @@ impl JsonRpcClient for SelfProvider { let id = SystemTime::now() .duration_since(UNIX_EPOCH) - .expect("can get current time") + .expect("Time should't go backwards") .as_millis() .to_string(); let response = super::proxy::handler( self.state.clone(), self.connect_info, - Query(RpcQueryParams { - chain_id: self.query.chain_id.clone(), - project_id: self.query.project_id.clone(), - }), + self.query.clone(), Method(HyperMethod::POST), self.path.clone(), self.headers.clone(), @@ -195,10 +311,12 @@ impl JsonRpcClient for SelfProvider { let response = serde_json::from_slice::(&bytes) .map_err(SelfProviderError::ProviderBodySerde)?; - match response { + let result = match response { JsonRpcResponse::Error(e) => return Err(SelfProviderError::JsonRpcError(e)), - JsonRpcResponse::Result(r) => Ok(serde_json::from_value(r.result) - .expect("Caller should provide generic parameter of type Bytes")), - } + JsonRpcResponse::Result(r) => r.result, + }; + let result = serde_json::from_value(result) + .expect("Caller always provides generic parameter R=Bytes"); + Ok(result) } } diff --git a/src/handlers/mod.rs b/src/handlers/mod.rs index 0ef5c509a..1b2025925 100644 --- a/src/handlers/mod.rs +++ b/src/handlers/mod.rs @@ -6,7 +6,7 @@ pub mod metrics; pub mod proxy; pub mod ws_proxy; -#[derive(Deserialize)] +#[derive(Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct RpcQueryParams { pub chain_id: String, diff --git a/src/handlers/proxy.rs b/src/handlers/proxy.rs index 91ff05fa5..fa08b1733 100644 --- a/src/handlers/proxy.rs +++ b/src/handlers/proxy.rs @@ -62,7 +62,7 @@ pub async fn handler( .get_provider_for_chain_id(&chain_id) .ok_or(RpcError::UnsupportedChain(chain_id.clone()))?; - state.metrics.add_rpc_call(&chain_id); + state.metrics.add_rpc_call(chain_id.clone()); let origin = headers .get("origin") @@ -101,7 +101,7 @@ pub async fn handler( state .metrics - .add_status_code_for_provider(provider.borrow(), response.status(), &chain_id); + .add_status_code_for_provider(provider.borrow(), response.status(), chain_id); if provider.is_rate_limited(&mut response).await { state diff --git a/src/lib.rs b/src/lib.rs index 4ffbd46f2..9fc7ebc99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,14 @@ use { - crate::{env::Config, metrics::Metrics, project::Registry}, + crate::{ + env::Config, + handlers::identity::IdentityResponse, + metrics::Metrics, + project::Registry, + storage::{ + redis::{self}, + KeyValueStorage, + }, + }, anyhow::Context, axum::{ http, @@ -64,12 +73,19 @@ pub async fn bootstrap(config: Config) -> RpcResult<()> { let metrics = Arc::new(Metrics::new(&meter)); let registry = Registry::new(&config.registry, &config.storage, &meter)?; + // TODO refactor encapsulate these details in a lower layer + let identity_cache = config + .storage + .project_data_redis_addr() + .map(|addr| redis::Redis::new(&addr, config.storage.redis_max_connections)) + .transpose()? + .map(|r| Arc::new(r) as Arc> + 'static>); let providers = init_providers(); let external_ip = config .server .external_ip() - .unwrap_or_else(|_| IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); + .unwrap_or(IpAddr::V4(Ipv4Addr::LOCALHOST)); let analytics = analytics::RPCAnalytics::new(&config.analytics, external_ip) .await @@ -81,6 +97,7 @@ pub async fn bootstrap(config: Config) -> RpcResult<()> { prometheus_exporter, metrics.clone(), registry, + identity_cache, analytics, ); @@ -121,11 +138,11 @@ pub async fn bootstrap(config: Config) -> RpcResult<()> { move |response: &Response, latency: Duration, _span: &Span| { proxy_state .metrics - .add_http_call(response.status().into(), "proxy"); + .add_http_call(response.status().into(), "proxy".to_owned()); proxy_state.metrics.add_http_latency( response.status().into(), - "proxy", + "proxy".to_owned(), latency.as_secs_f64(), ) }, diff --git a/src/metrics.rs b/src/metrics.rs index 696faf340..b0f393a08 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -1,7 +1,11 @@ use { - crate::providers::{ProviderKind, RpcProvider}, + crate::{ + handlers::identity::IdentityLookupSource, + providers::{ProviderKind, RpcProvider}, + }, hyper::http, opentelemetry::metrics::{Counter, Meter, ValueRecorder}, + std::time::{Duration, SystemTime}, }; #[derive(Debug)] @@ -16,6 +20,18 @@ pub struct Metrics { pub rate_limited_call_counter: Counter, pub provider_status_code_counter: Counter, pub weights_value_recorder: ValueRecorder, + pub identity_lookup_latency_tracker: ValueRecorder, + pub identity_lookup_counter: Counter, + pub identity_lookup_success_counter: Counter, + pub identity_lookup_cache_latency_tracker: ValueRecorder, + pub identity_lookup_name_counter: Counter, + pub identity_lookup_name_success_counter: Counter, + pub identity_lookup_name_latency_tracker: ValueRecorder, + pub identity_lookup_avatar_counter: Counter, + pub identity_lookup_avatar_success_counter: Counter, + pub identity_lookup_avatar_latency_tracker: ValueRecorder, + pub identity_lookup_avatar_present_counter: Counter, + pub identity_lookup_name_present_counter: Counter, } impl Metrics { @@ -70,6 +86,66 @@ impl Metrics { .with_description("The weights of the providers") .init(); + let identity_lookup_counter = meter + .u64_counter("identity_lookup_counter") + .with_description("The number of identity lookups served") + .init(); + + let identity_lookup_success_counter = meter + .u64_counter("identity_lookup_success_counter") + .with_description("The number of identity lookups that were successful") + .init(); + + let identity_lookup_latency_tracker = meter + .f64_value_recorder("identity_lookup_latency_tracker") + .with_description("The latency to serve identity lookups") + .init(); + + let identity_lookup_cache_latency_tracker = meter + .f64_value_recorder("identity_lookup_cache_latency_tracker") + .with_description("The latency to lookup identity in the cache") + .init(); + + let identity_lookup_name_counter = meter + .u64_counter("identity_lookup_name_counter") + .with_description("The number of name lookups") + .init(); + + let identity_lookup_name_success_counter = meter + .u64_counter("identity_lookup_name_success_counter") + .with_description("The number of name lookups that were successfull") + .init(); + + let identity_lookup_name_latency_tracker = meter + .f64_value_recorder("identity_lookup_name_latency_tracker") + .with_description("The latency of performing the name lookup") + .init(); + + let identity_lookup_avatar_counter = meter + .u64_counter("identity_lookup_avatar_counter") + .with_description("The number of avatar lookups") + .init(); + + let identity_lookup_avatar_success_counter = meter + .u64_counter("identity_lookup_avatar_success_counter") + .with_description("The number of avatar lookups that were successfull") + .init(); + + let identity_lookup_avatar_latency_tracker = meter + .f64_value_recorder("identity_lookup_avatar_latency_tracker") + .with_description("The latency of performing the avatar lookup") + .init(); + + let identity_lookup_name_present_counter = meter + .u64_counter("identity_lookup_name_present_counter") + .with_description("The number of identity lookups that returned a name") + .init(); + + let identity_lookup_avatar_present_counter = meter + .u64_counter("identity_lookup_avatar_present_counter") + .with_description("The number of identity lookups that returned an avatar") + .init(); + Metrics { rpc_call_counter, http_call_counter, @@ -81,29 +157,39 @@ impl Metrics { provider_finished_call_counter, provider_status_code_counter, weights_value_recorder, + identity_lookup_counter, + identity_lookup_success_counter, + identity_lookup_latency_tracker, + identity_lookup_cache_latency_tracker, + identity_lookup_name_counter, + identity_lookup_name_success_counter, + identity_lookup_name_latency_tracker, + identity_lookup_avatar_counter, + identity_lookup_avatar_success_counter, + identity_lookup_avatar_latency_tracker, + identity_lookup_name_present_counter, + identity_lookup_avatar_present_counter, } } } impl Metrics { - pub fn add_rpc_call(&self, chain_id: &str) { - self.rpc_call_counter.add(1, &[opentelemetry::KeyValue::new( - "chain.id", - chain_id.to_owned(), - )]); + pub fn add_rpc_call(&self, chain_id: String) { + self.rpc_call_counter + .add(1, &[opentelemetry::KeyValue::new("chain.id", chain_id)]); } - pub fn add_http_call(&self, code: u16, route: &str) { + pub fn add_http_call(&self, code: u16, route: String) { self.http_call_counter.add(1, &[ opentelemetry::KeyValue::new("code", i64::from(code)), - opentelemetry::KeyValue::new("route", route.to_owned()), + opentelemetry::KeyValue::new("route", route), ]); } - pub fn add_http_latency(&self, code: u16, route: &str, latency: f64) { + pub fn add_http_latency(&self, code: u16, route: String, latency: f64) { self.http_latency_tracker.record(latency, &[ opentelemetry::KeyValue::new("code", i64::from(code)), - opentelemetry::KeyValue::new("route", route.to_owned()), + opentelemetry::KeyValue::new("route", route), ]) } @@ -146,19 +232,95 @@ impl Metrics { &self, provider: &dyn RpcProvider, status: http::StatusCode, - chain_id: &str, + chain_id: String, ) { self.provider_status_code_counter.add(1, &[ opentelemetry::KeyValue::new("provider", provider.provider_kind().to_string()), opentelemetry::KeyValue::new("status_code", format!("{}", status.as_u16())), - opentelemetry::KeyValue::new("chain_id", chain_id.to_owned()), + opentelemetry::KeyValue::new("chain_id", chain_id), ]) } - pub fn record_provider_weight(&self, provider: &ProviderKind, chain_id: &str, weight: u64) { + pub fn record_provider_weight(&self, provider: &ProviderKind, chain_id: String, weight: u64) { self.weights_value_recorder.record(weight, &[ opentelemetry::KeyValue::new("provider", provider.to_string()), - opentelemetry::KeyValue::new("chain_id", chain_id.to_string()), + opentelemetry::KeyValue::new("chain_id", chain_id), ]) } + + pub fn add_identity_lookup(&self) { + self.identity_lookup_counter.add(1, &[]); + } + + pub fn add_identity_lookup_success(&self, source: &IdentityLookupSource) { + self.identity_lookup_success_counter + .add(1, &[opentelemetry::KeyValue::new( + "source", + source.as_str(), + )]); + } + + pub fn add_identity_lookup_latency(&self, start: SystemTime, source: &IdentityLookupSource) { + self.identity_lookup_latency_tracker.record( + start + .elapsed() + .unwrap_or(Duration::from_secs(0)) + .as_secs_f64(), + &[opentelemetry::KeyValue::new("source", source.as_str())], + ); + } + + pub fn add_identity_lookup_cache_latency(&self, start: SystemTime) { + self.identity_lookup_cache_latency_tracker.record( + start + .elapsed() + .unwrap_or(Duration::from_secs(0)) + .as_secs_f64(), + &[], + ); + } + + pub fn add_identity_lookup_name(&self) { + self.identity_lookup_name_counter.add(1, &[]); + } + + pub fn add_identity_lookup_name_success(&self) { + self.identity_lookup_name_success_counter.add(1, &[]); + } + + pub fn add_identity_lookup_name_latency(&self, start: SystemTime) { + self.identity_lookup_name_latency_tracker.record( + start + .elapsed() + .unwrap_or(Duration::from_secs(0)) + .as_secs_f64(), + &[], + ); + } + + pub fn add_identity_lookup_avatar(&self) { + self.identity_lookup_avatar_counter.add(1, &[]); + } + + pub fn add_identity_lookup_avatar_success(&self) { + self.identity_lookup_avatar_success_counter.add(1, &[]); + } + + pub fn add_identity_lookup_avatar_latency(&self, start: SystemTime) { + self.identity_lookup_avatar_latency_tracker.record( + start + .elapsed() + .unwrap_or(Duration::from_secs(0)) + .as_secs_f64(), + &[], + ); + } + + pub fn add_identity_lookup_name_present(&self) { + self.identity_lookup_name_present_counter.add(1, &[]); + } + + pub fn add_identity_lookup_avatar_present(&self) { + self.identity_lookup_avatar_present_counter.add(1, &[]); + } } diff --git a/src/providers/weights.rs b/src/providers/weights.rs index 38004540f..b435fd092 100644 --- a/src/providers/weights.rs +++ b/src/providers/weights.rs @@ -127,7 +127,7 @@ pub fn record_values(weight_resolver: &WeightResolver, metrics: &crate::Metrics) for (chain_id, provider_chain_weight) in weight_resolver { for (provider_kind, weight) in provider_chain_weight { let weight = weight.value(); - metrics.record_provider_weight(provider_kind, chain_id, weight.into()) + metrics.record_provider_weight(provider_kind, chain_id.to_owned(), weight.into()) } } } diff --git a/src/state.rs b/src/state.rs index e0994002c..72d73aefa 100644 --- a/src/state.rs +++ b/src/state.rs @@ -2,9 +2,11 @@ use { crate::{ analytics::RPCAnalytics, env::Config, + handlers::identity::IdentityResponse, metrics::Metrics, project::Registry, providers::ProviderRepository, + storage::KeyValueStorage, utils::build::CompileInfo, }, opentelemetry_prometheus::PrometheusExporter, @@ -17,6 +19,7 @@ pub struct AppState { pub exporter: PrometheusExporter, pub metrics: Arc, pub registry: Registry, + pub identity_cache: Option>>>, pub analytics: RPCAnalytics, pub compile_info: CompileInfo, } @@ -27,6 +30,7 @@ pub fn new_state( exporter: PrometheusExporter, metrics: Arc, registry: Registry, + identity_cache: Option>>>, analytics: RPCAnalytics, ) -> AppState { AppState { @@ -35,6 +39,7 @@ pub fn new_state( exporter, metrics, registry, + identity_cache, analytics, compile_info: CompileInfo {}, } diff --git a/terraform/README.md b/terraform/README.md index 9b2cdf3a8..cd5daad6b 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -11,3 +11,31 @@ Use the dev workspace: Now you can apply the changes: `terraform -chdir=terraform apply -var-file="vars/$(terraform -chdir=terraform workspace show).tfvars"` + +## Deploying local code changes + +```bash +nano .env # set AWS access keys and GRAFANA_AUTH +source .env +./terraform/deploy-dev.sh +``` + +### macOS considerations + +If you get this error: + +``` +assertion failed [find_leftmost_allocation_holding_lock(interval) == nullptr]: interval being added overlaps existing allocation +(VMAllocationTracker.cpp:322 add) +``` + +Try disabling "Use Rosetta for x86/amd64 emulation on Apple Silicon" in Docker Desktop settings. + +#### Remote building + +If amd64 builds are too slow on your Mac (likely), consider using a remote builder on a linux/amd64 host: + +```bash +docker buildx create --name=remote-amd64 --driver=docker-container ssh:// +BUILD_ARGS="--builder=remote-amd64 --load" ./terraform/deploy-dev.sh +``` diff --git a/terraform/analytics.tf b/terraform/analytics.tf index 85bb9aab3..91d2a25d4 100644 --- a/terraform/analytics.tf +++ b/terraform/analytics.tf @@ -15,7 +15,17 @@ resource "aws_s3_bucket" "analytics-data-lake_bucket" { bucket = "walletconnect.${local.app_name}.${terraform.workspace}.analytics.data-lake" } +# https://github.com/hashicorp/terraform-provider-aws/issues/28353 +resource "aws_s3_bucket_ownership_controls" "analytics-data-lake_controls" { + bucket = aws_s3_bucket.analytics-data-lake_bucket.id + rule { + object_ownership = "BucketOwnerPreferred" + } +} + resource "aws_s3_bucket_acl" "analytics-data-lake_acl" { + depends_on = [aws_s3_bucket_ownership_controls.analytics-data-lake_controls] + bucket = aws_s3_bucket.analytics-data-lake_bucket.id acl = "private" } diff --git a/terraform/deploy-dev.sh b/terraform/deploy-dev.sh new file mode 100755 index 000000000..596f29134 --- /dev/null +++ b/terraform/deploy-dev.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +set -e + +TERRAFORM_DIR="$(dirname "$0")" + +accountId="$(aws sts get-caller-identity | jq -r .Account)" +region="$(cat $TERRAFORM_DIR/variables.tf | grep -A 2 region | grep default | sed -nr 's/.+default = "(.+)"/\1/p')" + +imageRepo="$accountId.dkr.ecr.$region.amazonaws.com/rpc-proxy" + +aws ecr get-login-password --region eu-central-1 | docker login --username AWS --password-stdin "$imageRepo" +# --platform=linux/amd64: Must target linux/amd64 as that is what ECS runs. +docker build "$TERRAFORM_DIR/.." -t "$imageRepo" --build-arg=release=true --platform=linux/amd64 $BUILD_ARGS +sha="$(docker inspect --format="{{ .Id }}" "$imageRepo" | cut -d: -f2)" +tag="$imageRepo:$sha" +docker tag "$imageRepo" "$tag" +docker push "$tag" + +terraform -chdir="$TERRAFORM_DIR" workspace select dev +TF_VAR_ecr_app_version="$sha" terraform -chdir="$TERRAFORM_DIR" apply -var-file="vars/$(terraform -chdir="$TERRAFORM_DIR" workspace show).tfvars" -auto-approve diff --git a/terraform/ecs/main.tf b/terraform/ecs/main.tf index 928c6850e..aaa635fa4 100644 --- a/terraform/ecs/main.tf +++ b/terraform/ecs/main.tf @@ -17,7 +17,7 @@ locals { // TODO: version the RPC image so we can pin it # pinned_latest_tag = sort(setsubtract(data.aws_ecr_image.service_image.image_tags, ["latest"]))[0] // TODO: allow caller to pin version - image_tag = data.aws_ecr_image.service_image.image_tags[0] # TODO: var.ecr_app_version == "latest" ? local.pinned_latest_tag : var.ecr_app_version + image_tag = var.ecr_app_version == "latest" ? data.aws_ecr_image.service_image.image_tags[0] : var.ecr_app_version # TODO: var.ecr_app_version == "latest" ? local.pinned_latest_tag : var.ecr_app_version image = "${var.ecr_repository_url}:${local.image_tag}" file_descriptor_soft_limit = pow(2, 18) diff --git a/terraform/ecs/variables.tf b/terraform/ecs/variables.tf index dad84a9ec..39ea649ce 100644 --- a/terraform/ecs/variables.tf +++ b/terraform/ecs/variables.tf @@ -2,6 +2,11 @@ variable "ecr_repository_url" { type = string } +variable "ecr_app_version" { + description = "The tag of the app image to deploy." + type = string +} + variable "app_name" { type = string } diff --git a/terraform/logging/main.tf b/terraform/logging/main.tf index ef055834a..e2893bfcf 100644 --- a/terraform/logging/main.tf +++ b/terraform/logging/main.tf @@ -3,7 +3,17 @@ resource "aws_s3_bucket" "logging_bucket" { bucket = "walletconnect.${var.app_name}.${terraform.workspace}.access-logs" } +# https://github.com/hashicorp/terraform-provider-aws/issues/28353 +resource "aws_s3_bucket_ownership_controls" "logging_bucket-controls" { + bucket = aws_s3_bucket.logging_bucket.id + rule { + object_ownership = "BucketOwnerPreferred" + } +} + resource "aws_s3_bucket_acl" "logging_bucket-acl" { + depends_on = [aws_s3_bucket_ownership_controls.logging_bucket-controls] + bucket = aws_s3_bucket.logging_bucket.id acl = "log-delivery-write" } diff --git a/terraform/main.tf b/terraform/main.tf index 7d1b2df0b..1a362cf4c 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -6,7 +6,8 @@ locals { fqdn = terraform.workspace == "prod" ? local.hosted_zone_name : "${terraform.workspace}.${local.hosted_zone_name}" backup_fqdn = terraform.workspace == "prod" ? local.backup_hosted_zone_name : "${terraform.workspace}.${local.backup_hosted_zone_name}" - analytics_geoip_db_bucket_name = "${terraform.workspace}.relay.geo.ip.database.private.${terraform.workspace}.walletconnect" + analytics_geoip_db_bucket_env = terraform.workspace == "dev" ? "staging" : terraform.workspace + analytics_geoip_db_bucket_name = "${local.analytics_geoip_db_bucket_env}.relay.geo.ip.database.private.${local.analytics_geoip_db_bucket_env}.walletconnect" } # tflint-ignore: terraform_unused_declarations @@ -55,6 +56,7 @@ module "ecs" { environment = terraform.workspace ecr_repository_url = data.aws_ecr_repository.repository.repository_url + ecr_app_version = var.ecr_app_version app_name = "${terraform.workspace}_${local.app_name}" region = var.region vpc_name = "ops-${terraform.workspace}-vpc" diff --git a/terraform/monitoring/dashboard.jsonnet b/terraform/monitoring/dashboard.jsonnet index 678fab605..713860767 100644 --- a/terraform/monitoring/dashboard.jsonnet +++ b/terraform/monitoring/dashboard.jsonnet @@ -30,6 +30,7 @@ local vars = { local height = 8; local pos = grafana.layout.pos(height); +local pos_short = grafana.layout.pos(6); //////////////////////////////////////////////////////////////////////////////// @@ -83,4 +84,11 @@ dashboard.new( row.new('Database'), panels.db.redis_cpu_memory(ds, vars) { gridPos: pos._2 }, + + row.new('Identity (ENS) Metrics'), + panels.identity.requests(ds, vars) { gridPos: pos_short._2 }, + panels.identity.availability(ds, vars) { gridPos: pos_short._2 }, + panels.identity.latency(ds, vars) { gridPos: pos_short._2 }, + panels.identity.cache(ds, vars) { gridPos: pos_short._2 }, + panels.identity.usage(ds, vars) { gridPos: pos_short._2 }, ])) diff --git a/terraform/monitoring/panels/ecs/availability.libsonnet b/terraform/monitoring/panels/ecs/availability.libsonnet index 82c016a85..bf70c8982 100644 --- a/terraform/monitoring/panels/ecs/availability.libsonnet +++ b/terraform/monitoring/panels/ecs/availability.libsonnet @@ -10,7 +10,14 @@ local targets = grafana.targets; title = 'Availability', datasource = ds.prometheus, ) - .configure(defaults.configuration.timeseries.withUnit('percent')) + .configure( + defaults.configuration.timeseries + .withUnit('percent') + .withSoftLimit( + axisSoftMin = 98, + axisSoftMax = 100, + ) + ) .addTarget(targets.prometheus( datasource = ds.prometheus, diff --git a/terraform/monitoring/panels/identity/availability.libsonnet b/terraform/monitoring/panels/identity/availability.libsonnet new file mode 100644 index 000000000..d496166e6 --- /dev/null +++ b/terraform/monitoring/panels/identity/availability.libsonnet @@ -0,0 +1,73 @@ +local grafana = import '../../grafonnet-lib/grafana.libsonnet'; +local defaults = import '../../grafonnet-lib/defaults.libsonnet'; + +local panels = grafana.panels; +local targets = grafana.targets; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Availability', + datasource = ds.prometheus, + ) + .configure( + defaults.configuration.timeseries + .withUnit('percent') + .withSoftLimit( + axisSoftMin = 98, + axisSoftMax = 100, + ) + ) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_counter{}[$__rate_interval]))', + refId = "lookup", + hide = true, + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_success_counter{}[$__rate_interval]))', + refId = "lookup_success", + hide = true, + )) + .addTarget(targets.math( + expr = '($lookup_success / $lookup) * 100', + refId = "Availability", + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_name_counter{}[$__rate_interval]))', + refId = "lookup_name", + hide = true, + )) + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_name_success_counter{}[$__rate_interval]))', + refId = "lookup_name_success", + hide = true, + )) + .addTarget(targets.math( + expr = '($lookup_name_success / $lookup_name) * 100', + refId = "Name availability", + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_avatar_counter{}[$__rate_interval]))', + refId = "lookup_avatar", + hide = true, + )) + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_avatar_success_counter{}[$__rate_interval]))', + refId = "lookup_avatar_success", + hide = true, + )) + .addTarget(targets.math( + expr = '($lookup_avatar_success / $lookup_avatar) * 100', + refId = "Avatar availability", + )) +} diff --git a/terraform/monitoring/panels/identity/cache.libsonnet b/terraform/monitoring/panels/identity/cache.libsonnet new file mode 100644 index 000000000..05a5db1ad --- /dev/null +++ b/terraform/monitoring/panels/identity/cache.libsonnet @@ -0,0 +1,41 @@ +local grafana = import '../../grafonnet-lib/grafana.libsonnet'; +local defaults = import '../../grafonnet-lib/defaults.libsonnet'; + +local panels = grafana.panels; +local targets = grafana.targets; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Cache-hit ratio', + datasource = ds.prometheus, + ) + .configure( + defaults.configuration.timeseries + .withUnit('percent') + .withSoftLimit( + axisSoftMin = 0, + axisSoftMax = 100, + ) + ) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_success_counter{}[$__rate_interval]))', + refId = "lookups", + exemplar = false, + hide = true, + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_success_counter{source="cache"}[$__rate_interval]))', + refId = "cache_hits", + exemplar = false, + hide = true, + )) + .addTarget(targets.math( + expr = '($cache_hits / $lookups) * 100', + refId = "Cache-hits", + )) +} diff --git a/terraform/monitoring/panels/identity/latency.libsonnet b/terraform/monitoring/panels/identity/latency.libsonnet new file mode 100644 index 000000000..01b36c574 --- /dev/null +++ b/terraform/monitoring/panels/identity/latency.libsonnet @@ -0,0 +1,45 @@ +local grafana = import '../../grafonnet-lib/grafana.libsonnet'; +local defaults = import '../../grafonnet-lib/defaults.libsonnet'; + +local panels = grafana.panels; +local targets = grafana.targets; + +local _configuration = defaults.configuration.timeseries + .withUnit('ms') + .withSoftLimit( + axisSoftMin = 0.4, + axisSoftMax = 1.1, + ); + +{ + new(ds, vars):: + panels.timeseries( + title = 'Latency', + datasource = ds.prometheus, + ) + .configure(_configuration) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_latency{}[$__rate_interval]))', + refId = "Latency", + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_cache_latency_tracker{}[$__rate_interval]))', + refId = "Cache latency", + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_name_latency_tracker{}[$__rate_interval]))', + refId = "Name RPC latency", + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_avatar_latency_tracker{}[$__rate_interval]))', + refId = "Avatar RPC latency", + )) +} diff --git a/terraform/monitoring/panels/identity/requests.libsonnet b/terraform/monitoring/panels/identity/requests.libsonnet new file mode 100644 index 000000000..9b5f9fc91 --- /dev/null +++ b/terraform/monitoring/panels/identity/requests.libsonnet @@ -0,0 +1,20 @@ +local grafana = import '../../grafonnet-lib/grafana.libsonnet'; +local defaults = import '../../grafonnet-lib/defaults.libsonnet'; + +local panels = grafana.panels; +local targets = grafana.targets; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Requests', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries.withUnit('reqps')) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_counter{}[$__rate_interval]))', + refId = "Requests", + )) +} diff --git a/terraform/monitoring/panels/identity/usage.libsonnet b/terraform/monitoring/panels/identity/usage.libsonnet new file mode 100644 index 000000000..665d2aac7 --- /dev/null +++ b/terraform/monitoring/panels/identity/usage.libsonnet @@ -0,0 +1,47 @@ +local grafana = import '../../grafonnet-lib/grafana.libsonnet'; +local defaults = import '../../grafonnet-lib/defaults.libsonnet'; + +local panels = grafana.panels; +local targets = grafana.targets; + +{ + new(ds, vars):: + panels.timeseries( + title = 'Avatar usage', + datasource = ds.prometheus, + ) + .configure(defaults.configuration.timeseries.withUnit('percent')) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_success_counter{}[$__rate_interval]))', + refId = "lookups", + exemplar = false, + hide = true, + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_name_present_counter{}[$__rate_interval]))', + refId = "name_present", + exemplar = false, + hide = true, + )) + + .addTarget(targets.prometheus( + datasource = ds.prometheus, + expr = 'sum(rate(identity_lookup_avatar_present_counter{}[$__rate_interval]))', + refId = "avatar_present", + exemplar = false, + hide = true, + )) + + .addTarget(targets.math( + expr = '($name_present / $lookups) * 100', + refId = "% of lookups with name", + )) + .addTarget(targets.math( + expr = '($avatar_present / $lookups) * 100', + refId = "% of lookups with avatar", + )) +} diff --git a/terraform/monitoring/panels/panels.libsonnet b/terraform/monitoring/panels/panels.libsonnet index 80e58e678..cef8b2217 100644 --- a/terraform/monitoring/panels/panels.libsonnet +++ b/terraform/monitoring/panels/panels.libsonnet @@ -26,4 +26,12 @@ db: { redis_cpu_memory: (import 'db/redis_cpu_memory.libsonnet' ).new, }, + + identity: { + requests: (import 'identity/requests.libsonnet' ).new, + availability: (import 'identity/availability.libsonnet' ).new, + latency: (import 'identity/latency.libsonnet' ).new, + cache: (import 'identity/cache.libsonnet' ).new, + usage: (import 'identity/usage.libsonnet' ).new, + }, } diff --git a/terraform/variables.tf b/terraform/variables.tf index 61eadb42a..0cbc490f7 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -3,6 +3,11 @@ variable "region" { default = "eu-central-1" } +variable "ecr_app_version" { + description = "The tag of the app image to deploy." + type = string +} + variable "infura_project_id" { type = string } diff --git a/terraform/vars/dev.tfvars b/terraform/vars/dev.tfvars index 64ac193e0..71d9d9756 100644 --- a/terraform/vars/dev.tfvars +++ b/terraform/vars/dev.tfvars @@ -1,2 +1,4 @@ -project_data_cache_ttl = 300 -registry_api_endpoint = "https://registry-prod-cf.walletconnect.com" +project_data_cache_ttl = 300 +registry_api_endpoint = "https://registry-prod-cf.walletconnect.com" +autoscaling_max_instances = 2 +autoscaling_min_instances = 1