diff --git a/PROGRESS.md b/PROGRESS.md index 8db6e91..bad7044 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -29,19 +29,19 @@ - [ ] RPC nodes - [ ] Client - [ ] Create & Deploy Replica Set - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client - [ ] Create & Deploy Services - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client -- [ ] Check Bootstrap is deployed and running -- [ ] Build and deploy Load Balancer (sits in front of bootstrap and RPC nodes) +- [x] Check Bootstrap is deployed and running +- [x] Build and deploy Load Balancer (sits in front of bootstrap and RPC nodes) - [ ] Add metrics - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client @@ -69,7 +69,7 @@ Above, we start with bootstrap, and then we do validators (regular), and then we - Use command line flags to set type of client, tx-count, etc - [ ] Add in kubernetes deployment flags - - [ ] CPU/Memory Requests + - [x] CPU/Memory Requests - [ ] Node Affinity -> Regions - [ ] Node Affinity -> Node Type (Equinix/Lumen) diff --git a/README.md b/README.md index f6da3f4..5c922a4 100644 --- a/README.md +++ b/README.md @@ -59,4 +59,52 @@ cargo run --bin cluster -- --tag # e.g. v1 --base-image # e.g. ubuntu:20.04 --image-name # e.g. cluster-image +``` + +## Metrics +1) Setup metrics database: +``` +cd scripts/ +./init-metrics -c +# enter password when promted +``` +2) add the following to your `cluster` command from above +``` +--metrics-host https://internal-metrics.solana.com # need the `https://` here +--metrics-port 8086 +--metrics-db # from (1) +--metrics-username # from (1) +--metrics-password # from (1) +``` + + +## Kubernetes Cheatsheet +Create namespace: +``` +kubectl create ns +``` + +Delete namespace: +``` +kubectl delete ns +``` + +Get running pods: +``` +kubectl get pods -n +``` + +Get pod logs: +``` +kubectl logs -n +``` + +Exec into pod: +``` +kubectl exec -it -n -- /bin/bash +``` + +Get information about pod: +``` +kubectl describe pod -n ``` \ No newline at end of file diff --git a/scripts/init-metrics.sh b/scripts/init-metrics.sh new file mode 100755 index 0000000..ddd5aaa --- /dev/null +++ b/scripts/init-metrics.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -e + +here=$(dirname "$0") + +# https://gist.github.com/cdown/1163649 +urlencode() { + declare s="$1" + declare l=$((${#s} - 1)) + for i in $(seq 0 $l); do + declare c="${s:$i:1}" + case $c in + [a-zA-Z0-9.~_-]) + echo -n "$c" + ;; + *) + printf '%%%02X' "'$c" + ;; + esac + done +} + +usage() { + exitcode=0 + if [[ -n "$1" ]]; then + exitcode=1 + echo "Error: $*" + fi + cat <) -> Secret { +fn build_secret(name: String, data: BTreeMap) -> Secret { Secret { metadata: ObjectMeta { - name: Some(name.to_string()), + name: Some(name), ..Default::default() }, data: Some(data), @@ -32,7 +33,7 @@ fn build_secret(name: &str, data: BTreeMap) -> Secret { } pub fn create_secret( - secret_name: &str, + secret_name: String, secrets: BTreeMap, ) -> Result> { let data = secrets @@ -119,3 +120,50 @@ pub fn create_replica_set( ..Default::default() }) } + +pub fn create_service( + service_name: String, + namespace: String, + label_selector: BTreeMap, + is_load_balancer: bool, +) -> Service { + Service { + metadata: ObjectMeta { + name: Some(service_name), + namespace: Some(namespace), + ..Default::default() + }, + spec: Some(ServiceSpec { + selector: Some(label_selector), + type_: if is_load_balancer { + Some("LoadBalancer".to_string()) + } else { + None + }, + cluster_ip: if is_load_balancer { + None + } else { + Some("None".to_string()) + }, + ports: Some(vec![ + ServicePort { + port: 8899, // RPC Port + name: Some("rpc-port".to_string()), + ..Default::default() + }, + ServicePort { + port: 8001, //Gossip Port + name: Some("gossip-port".to_string()), + ..Default::default() + }, + ServicePort { + port: 9900, //Faucet Port + name: Some("faucet-port".to_string()), + ..Default::default() + }, + ]), + ..Default::default() + }), + ..Default::default() + } +} diff --git a/src/kubernetes.rs b/src/kubernetes.rs index 4c43c18..cc0f050 100644 --- a/src/kubernetes.rs +++ b/src/kubernetes.rs @@ -3,14 +3,14 @@ use { docker::DockerImage, k8s_helpers::{self, SecretType}, validator_config::ValidatorConfig, - ValidatorType, + Metrics, ValidatorType, }, k8s_openapi::{ api::{ apps::v1::ReplicaSet, core::v1::{ - EnvVar, EnvVarSource, Namespace, ObjectFieldSelector, Secret, SecretVolumeSource, - Volume, VolumeMount, + EnvVar, EnvVarSource, Namespace, ObjectFieldSelector, Secret, SecretKeySelector, + SecretVolumeSource, Service, Volume, VolumeMount, }, }, apimachinery::pkg::api::resource::Quantity, @@ -45,6 +45,7 @@ pub struct Kubernetes<'a> { namespace: String, validator_config: &'a mut ValidatorConfig, pod_requests: PodRequests, + pub metrics: Option, } impl<'a> Kubernetes<'a> { @@ -52,12 +53,14 @@ impl<'a> Kubernetes<'a> { namespace: &str, validator_config: &'a mut ValidatorConfig, pod_requests: PodRequests, + metrics: Option, ) -> Kubernetes<'a> { Self { k8s_client: Client::try_default().await.unwrap(), namespace: namespace.to_owned(), validator_config, pod_requests, + metrics, } } @@ -113,7 +116,7 @@ impl<'a> Kubernetes<'a> { }, ); - k8s_helpers::create_secret(secret_name, secrets) + k8s_helpers::create_secret(secret_name.to_string(), secrets) } fn add_known_validator(&mut self, pubkey: Pubkey) { @@ -133,7 +136,7 @@ impl<'a> Kubernetes<'a> { secret_name: Option, label_selector: &BTreeMap, ) -> Result> { - let env_vars = vec![EnvVar { + let mut env_vars = vec![EnvVar { name: "MY_POD_IP".to_string(), value_from: Some(EnvVarSource { field_ref: Some(ObjectFieldSelector { @@ -145,6 +148,10 @@ impl<'a> Kubernetes<'a> { ..Default::default() }]; + if self.metrics.is_some() { + env_vars.push(self.get_metrics_env_var_secret()) + } + let accounts_volume = Some(vec![Volume { name: "bootstrap-accounts-volume".into(), secret: Some(SecretVolumeSource { @@ -209,4 +216,105 @@ impl<'a> Kubernetes<'a> { pub fn create_selector(&self, key: &str, value: &str) -> BTreeMap { k8s_helpers::create_selector(key, value) } + + pub async fn deploy_replicas_set( + &self, + replica_set: &ReplicaSet, + ) -> Result { + let api: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + let post_params = PostParams::default(); + // Apply the ReplicaSet + api.create(&post_params, replica_set).await + } + + pub fn create_bootstrap_service( + &self, + service_name: &str, + label_selector: &BTreeMap, + ) -> Service { + k8s_helpers::create_service( + service_name.to_string(), + self.namespace.clone(), + label_selector.clone(), + false, + ) + } + + pub async fn deploy_service(&self, service: &Service) -> Result { + let post_params = PostParams::default(); + // Create an API instance for Services in the specified namespace + let service_api: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + + // Create the Service object in the cluster + service_api.create(&post_params, service).await + } + + pub fn create_validator_load_balancer( + &self, + service_name: &str, + label_selector: &BTreeMap, + ) -> Service { + k8s_helpers::create_service( + service_name.to_string(), + self.namespace.clone(), + label_selector.clone(), + true, + ) + } + + pub async fn check_replica_set_ready( + &self, + replica_set_name: &str, + ) -> Result { + let replica_sets: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + let replica_set = replica_sets.get(replica_set_name).await?; + + let desired_validators = replica_set.spec.as_ref().unwrap().replicas.unwrap_or(1); + let available_validators = replica_set + .status + .as_ref() + .unwrap() + .available_replicas + .unwrap_or(0); + + Ok(available_validators >= desired_validators) + } + + pub fn create_metrics_secret(&self) -> Result> { + let mut data = BTreeMap::new(); + if let Some(metrics) = &self.metrics { + data.insert( + "SOLANA_METRICS_CONFIG".to_string(), + SecretType::Value { + v: metrics.to_env_string(), + }, + ); + } else { + return Err( + "Called create_metrics_secret() but metrics were not provided." + .to_string() + .into(), + ); + } + + k8s_helpers::create_secret("solana-metrics-secret".to_string(), data) + } + + pub fn get_metrics_env_var_secret(&self) -> EnvVar { + EnvVar { + name: "SOLANA_METRICS_CONFIG".to_string(), + value_from: Some(EnvVarSource { + secret_key_ref: Some(SecretKeySelector { + name: Some("solana-metrics-secret".to_string()), + key: "SOLANA_METRICS_CONFIG".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + } + } } diff --git a/src/lib.rs b/src/lib.rs index 101aa1f..ebe073a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,6 +56,39 @@ pub enum ValidatorType { Client, } +#[derive(Clone, Debug, Default)] +pub struct Metrics { + pub host: String, + pub port: String, + pub database: String, + pub username: String, + password: String, +} + +impl Metrics { + pub fn new( + host: String, + port: String, + database: String, + username: String, + password: String, + ) -> Self { + Metrics { + host, + port, + database, + username, + password, + } + } + pub fn to_env_string(&self) -> String { + format!( + "host={}:{},db={},u={},p={}", + self.host, self.port, self.database, self.username, self.password + ) + } +} + pub mod cluster_images; pub mod docker; pub mod genesis; diff --git a/src/main.rs b/src/main.rs index d98abee..bccc954 100644 --- a/src/main.rs +++ b/src/main.rs @@ -18,7 +18,7 @@ use { release::{BuildConfig, BuildType, DeployMethod}, validator::{LabelType, Validator}, validator_config::ValidatorConfig, - EnvironmentConfig, SolanaRoot, ValidatorType, + EnvironmentConfig, Metrics, SolanaRoot, ValidatorType, }, }; @@ -221,6 +221,38 @@ fn parse_matches() -> clap::ArgMatches { Can specify unit here (B, Ki, Mi, Gi, Ti) for bytes, kilobytes, etc (2^N notation) e.g. 1Gi == 1024Mi == 1024Ki == 1,047,576B. [default: 70Gi]"), ) + //Metrics Config + .arg( + Arg::with_name("metrics_host") + .long("metrics-host") + .takes_value(true) + .requires_all(&["metrics_port", "metrics_db", "metrics_username", "metrics_password"]) + .help("Metrics Config. Optional: specify metrics host. e.g. https://internal-metrics.solana.com"), + ) + .arg( + Arg::with_name("metrics_port") + .long("metrics-port") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics port. e.g. 8086"), + ) + .arg( + Arg::with_name("metrics_db") + .long("metrics-db") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics database. e.g. k8s-cluster-"), + ) + .arg( + Arg::with_name("metrics_username") + .long("metrics-username") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics username"), + ) + .arg( + Arg::with_name("metrics_password") + .long("metrics-password") + .takes_value(true) + .help("Metrics Config. Optional: Specify metrics password"), + ) .get_matches() } @@ -355,12 +387,24 @@ async fn main() { matches.value_of("memory_requests").unwrap().to_string(), ); + let metrics = matches.value_of("metrics_host").map(|host| { + Metrics::new( + host.to_string(), + matches.value_of("metrics_port").unwrap().to_string(), + matches.value_of("metrics_db").unwrap().to_string(), + matches.value_of("metrics_username").unwrap().to_string(), + matches.value_of("metrics_password").unwrap().to_string(), + ) + }); + let mut kub_controller = Kubernetes::new( environment_config.namespace, &mut validator_config, pod_requests, + metrics, ) .await; + match kub_controller.namespace_exists().await { Ok(true) => (), Ok(false) => { @@ -458,6 +502,24 @@ async fn main() { } } + // metrics secret create once and use by all pods + if kub_controller.metrics.is_some() { + let metrics_secret = match kub_controller.create_metrics_secret() { + Ok(secret) => secret, + Err(err) => { + error!("Failed to create metrics secret! {err}"); + return; + } + }; + match kub_controller.deploy_secret(&metrics_secret).await { + Ok(_) => (), + Err(err) => { + error!("{err}"); + return; + } + } + }; + let bootstrap_validator = cluster_images.bootstrap().expect("should be bootstrap"); match kub_controller.create_bootstrap_secret("bootstrap-accounts-secret", &config_directory) { Ok(secret) => bootstrap_validator.set_secret(secret), @@ -513,4 +575,62 @@ async fn main() { return; } }; + + // deploy bootstrap replica set + match kub_controller + .deploy_replicas_set(bootstrap_validator.replica_set()) + .await + { + Ok(_) => { + info!( + "{} deployed successfully", + bootstrap_validator.replica_set_name() + ); + } + Err(err) => { + error!("Error! Failed to deploy bootstrap validator replicas_set. err: {err}"); + return; + } + }; + + // create and deploy bootstrap-service + let bootstrap_service = kub_controller.create_bootstrap_service( + "bootstrap-validator-service", + bootstrap_validator.service_labels(), + ); + match kub_controller.deploy_service(&bootstrap_service).await { + Ok(_) => info!("bootstrap validator service deployed successfully"), + Err(err) => error!("Error! Failed to deploy bootstrap validator service. err: {err}"), + } + + //load balancer service. only create one and use for all bootstrap/rpc nodes + // service selector matches bootstrap selector + let load_balancer_label = + kub_controller.create_selector("load-balancer/name", "load-balancer-selector"); + //create load balancer + let load_balancer = kub_controller + .create_validator_load_balancer("bootstrap-and-rpc-node-lb-service", &load_balancer_label); + + //deploy load balancer + match kub_controller.deploy_service(&load_balancer).await { + Ok(_) => info!("load balancer service deployed successfully"), + Err(err) => error!("Error! Failed to deploy load balancer service. err: {err}"), + } + + // wait for bootstrap replicaset to deploy + while { + match kub_controller + .check_replica_set_ready(bootstrap_validator.replica_set_name().as_str()) + .await + { + Ok(ok) => !ok, // Continue the loop if replica set is not ready: Ok(false) + Err(_) => panic!("Error occurred while checking replica set readiness"), + } + } { + info!( + "replica set: {} not ready...", + bootstrap_validator.replica_set_name() + ); + std::thread::sleep(std::time::Duration::from_secs(1)); + } }